# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102
!IF "$(CFG)" == ""
-CFG=xmlwf - Win32 Debug
-!MESSAGE No configuration specified. Defaulting to xmlwf - Win32 Debug.
+CFG=gennmtab - Win32 Debug
+!MESSAGE No configuration specified. Defaulting to gennmtab - Win32 Debug.
!ENDIF
!IF "$(CFG)" != "xmltok - Win32 Release" && "$(CFG)" != "xmltok - Win32 Debug"\
&& "$(CFG)" != "xmlec - Win32 Release" && "$(CFG)" != "xmlec - Win32 Debug" &&\
- "$(CFG)" != "xmlwf - Win32 Release" && "$(CFG)" != "xmlwf - Win32 Debug"
+ "$(CFG)" != "xmlwf - Win32 Release" && "$(CFG)" != "xmlwf - Win32 Debug" &&\
+ "$(CFG)" != "gennmtab - Win32 Release" && "$(CFG)" != "gennmtab - Win32 Debug"
!MESSAGE Invalid configuration "$(CFG)" specified.
!MESSAGE You can specify a configuration when running NMAKE on this makefile
!MESSAGE by defining the macro CFG on the command line. For example:
!MESSAGE
-!MESSAGE NMAKE /f "xmltok.mak" CFG="xmlwf - Win32 Debug"
+!MESSAGE NMAKE /f "xmltok.mak" CFG="gennmtab - Win32 Debug"
!MESSAGE
!MESSAGE Possible choices for configuration are:
!MESSAGE
!MESSAGE "xmlec - Win32 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE "xmlwf - Win32 Release" (based on "Win32 (x86) Console Application")
!MESSAGE "xmlwf - Win32 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE "gennmtab - Win32 Release" (based on\
+ "Win32 (x86) Console Application")
+!MESSAGE "gennmtab - Win32 Debug" (based on "Win32 (x86) Console Application")
!MESSAGE
!ERROR An invalid configuration is specified.
!ENDIF
!ENDIF
################################################################################
# Begin Project
-# PROP Target_Last_Scanned "xmlwf - Win32 Debug"
+# PROP Target_Last_Scanned "gennmtab - Win32 Debug"
!IF "$(CFG)" == "xmltok - Win32 Release"
OUTDIR=.\Release
INTDIR=.\Release
-ALL : ".\bin\xmltok.dll"
+ALL : "gennmtab - Win32 Release" ".\bin\xmltok.dll"
CLEAN :
-@erase "$(INTDIR)\dllmain.obj"
-@erase "$(OUTDIR)\xmltok.exp"
-@erase "$(OUTDIR)\xmltok.lib"
-@erase ".\bin\xmltok.dll"
+ -@erase ".\nametab.h"
"$(OUTDIR)" :
if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)"
CPP=cl.exe
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /c
-# ADD CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /YX /c
-CPP_PROJ=/nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS"\
- /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/" /c
+# ADD CPP /nologo /MT /W3 /GX /O2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D XMLTOKAPI=__declspec(dllexport) /YX /c
+CPP_PROJ=/nologo /MT /W3 /GX /O2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D\
+ XMLTOKAPI=__declspec(dllexport) /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/"\
+ /c
CPP_OBJS=.\Release/
CPP_SBRS=.\.
OUTDIR=.\Debug
INTDIR=.\Debug
-ALL : "$(OUTDIR)\xmltok.dll"
+ALL : "gennmtab - Win32 Debug" "$(OUTDIR)\xmltok.dll"
CLEAN :
-@erase "$(INTDIR)\dllmain.obj"
-@erase "$(OUTDIR)\xmltok.ilk"
-@erase "$(OUTDIR)\xmltok.lib"
-@erase "$(OUTDIR)\xmltok.pdb"
+ -@erase ".\nametab.h"
"$(OUTDIR)" :
if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)"
CPP=cl.exe
# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /c
-# ADD CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /YX /c
-CPP_PROJ=/nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS"\
- /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c
+# ADD CPP /nologo /MTd /W3 /Gm /GX /Zi /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D XMLTOKAPI=__declspec(dllexport) /YX /c
+CPP_PROJ=/nologo /MTd /W3 /Gm /GX /Zi /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS"\
+ /D XMLTOKAPI=__declspec(dllexport) /Fp"$(INTDIR)/xmltok.pch" /YX\
+ /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c
CPP_OBJS=.\Debug/
CPP_SBRS=.\.
CPP=cl.exe
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-# ADD CPP /nologo /W3 /GX /O2 /Ob2 /I "." /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /YX /c
+# ADD CPP /nologo /W3 /GX /O2 /Ob2 /I "." /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /YX /c
CPP_PROJ=/nologo /ML /W3 /GX /O2 /Ob2 /I "." /D "NDEBUG" /D "WIN32" /D\
- "_CONSOLE" /Fp"$(INTDIR)/xmlec.pch" /YX /Fo"$(INTDIR)/" /c
+ "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /Fp"$(INTDIR)/xmlec.pch" /YX\
+ /Fo"$(INTDIR)/" /c
CPP_OBJS=.\xmlec\Release/
CPP_SBRS=.\.
CPP=cl.exe
# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c
-# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /I "." /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /YX /c
+# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /I "." /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /YX /c
CPP_PROJ=/nologo /MLd /W3 /Gm /GX /Zi /Od /I "." /D "_DEBUG" /D "WIN32" /D\
- "_CONSOLE" /Fp"$(INTDIR)/xmlec.pch" /YX /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c
+ "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /Fp"$(INTDIR)/xmlec.pch" /YX\
+ /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c
CPP_OBJS=.\xmlec\Debug/
CPP_SBRS=.\.
ALL : "xmltok - Win32 Release" ".\bin\xmlwf.exe"
CLEAN :
+ -@erase "$(INTDIR)\hashtable.obj"
-@erase "$(INTDIR)\wfcheck.obj"
+ -@erase "$(INTDIR)\wfcheckmessage.obj"
-@erase "$(INTDIR)\win32filemap.obj"
-@erase "$(INTDIR)\xmlwf.obj"
-@erase ".\bin\xmlwf.exe"
CPP=cl.exe
# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-# ADD CPP /nologo /W3 /GX /O2 /I "." /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
-CPP_PROJ=/nologo /ML /W3 /GX /O2 /I "." /D "WIN32" /D "NDEBUG" /D "_CONSOLE"\
- /Fp"$(INTDIR)/xmlwf.pch" /YX /Fo"$(INTDIR)/" /c
+# ADD CPP /nologo /W3 /GX /O2 /I "." /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /YX /c
+CPP_PROJ=/nologo /ML /W3 /GX /O2 /I "." /D "NDEBUG" /D "WIN32" /D "_CONSOLE" /D\
+ XMLTOKAPI=__declspec(dllimport) /Fp"$(INTDIR)/xmlwf.pch" /YX /Fo"$(INTDIR)/" /c\
+
CPP_OBJS=.\xmlwf\Release/
CPP_SBRS=.\.
odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:no\
/pdb:"$(OUTDIR)/xmlwf.pdb" /machine:I386 /out:"bin/xmlwf.exe"
LINK32_OBJS= \
+ "$(INTDIR)\hashtable.obj" \
"$(INTDIR)\wfcheck.obj" \
+ "$(INTDIR)\wfcheckmessage.obj" \
"$(INTDIR)\win32filemap.obj" \
"$(INTDIR)\xmlwf.obj" \
".\Release\xmltok.lib"
ALL : "xmltok - Win32 Debug" ".\Debug\xmlwf.exe"
CLEAN :
+ -@erase "$(INTDIR)\hashtable.obj"
-@erase "$(INTDIR)\vc40.idb"
-@erase "$(INTDIR)\vc40.pdb"
-@erase "$(INTDIR)\wfcheck.obj"
+ -@erase "$(INTDIR)\wfcheckmessage.obj"
-@erase "$(INTDIR)\win32filemap.obj"
-@erase "$(INTDIR)\xmlwf.obj"
-@erase "$(OUTDIR)\xmlwf.pdb"
CPP=cl.exe
# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c
-# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /I "." /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c
-CPP_PROJ=/nologo /MLd /W3 /Gm /GX /Zi /Od /I "." /D "WIN32" /D "_DEBUG" /D\
- "_CONSOLE" /Fp"$(INTDIR)/xmlwf.pch" /YX /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c
+# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /I "." /D "_DEBUG" /D "WIN32" /D "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /YX /c
+CPP_PROJ=/nologo /MLd /W3 /Gm /GX /Zi /Od /I "." /D "_DEBUG" /D "WIN32" /D\
+ "_CONSOLE" /D XMLTOKAPI=__declspec(dllimport) /Fp"$(INTDIR)/xmlwf.pch" /YX\
+ /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c
CPP_OBJS=.\xmlwf\Debug/
CPP_SBRS=.\.
odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:yes\
/pdb:"$(OUTDIR)/xmlwf.pdb" /debug /machine:I386 /out:"Debug/xmlwf.exe"
LINK32_OBJS= \
+ "$(INTDIR)\hashtable.obj" \
"$(INTDIR)\wfcheck.obj" \
+ "$(INTDIR)\wfcheckmessage.obj" \
"$(INTDIR)\win32filemap.obj" \
"$(INTDIR)\xmlwf.obj" \
".\Debug\xmltok.lib"
$(LINK32_FLAGS) $(LINK32_OBJS)
<<
+!ELSEIF "$(CFG)" == "gennmtab - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "gennmtab\Release"
+# PROP BASE Intermediate_Dir "gennmtab\Release"
+# PROP BASE Target_Dir "gennmtab"
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "gennmtab\Release"
+# PROP Intermediate_Dir "gennmtab\Release"
+# PROP Target_Dir "gennmtab"
+OUTDIR=.\gennmtab\Release
+INTDIR=.\gennmtab\Release
+
+ALL : "$(OUTDIR)\gennmtab.exe"
+
+CLEAN :
+ -@erase "$(INTDIR)\gennmtab.obj"
+ -@erase "$(OUTDIR)\gennmtab.exe"
+
+"$(OUTDIR)" :
+ if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)"
+
+CPP=cl.exe
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
+# ADD CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /YX /c
+CPP_PROJ=/nologo /ML /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE"\
+ /Fp"$(INTDIR)/gennmtab.pch" /YX /Fo"$(INTDIR)/" /c
+CPP_OBJS=.\gennmtab\Release/
+CPP_SBRS=.\.
+
+.c{$(CPP_OBJS)}.obj:
+ $(CPP) $(CPP_PROJ) $<
+
+.cpp{$(CPP_OBJS)}.obj:
+ $(CPP) $(CPP_PROJ) $<
+
+.cxx{$(CPP_OBJS)}.obj:
+ $(CPP) $(CPP_PROJ) $<
+
+.c{$(CPP_SBRS)}.sbr:
+ $(CPP) $(CPP_PROJ) $<
+
+.cpp{$(CPP_SBRS)}.sbr:
+ $(CPP) $(CPP_PROJ) $<
+
+.cxx{$(CPP_SBRS)}.sbr:
+ $(CPP) $(CPP_PROJ) $<
+
+RSC=rc.exe
+# ADD BASE RSC /l 0x809 /d "NDEBUG"
+# ADD RSC /l 0x809 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+BSC32_FLAGS=/nologo /o"$(OUTDIR)/gennmtab.bsc"
+BSC32_SBRS= \
+
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+LINK32_FLAGS=kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib\
+ advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib\
+ odbccp32.lib /nologo /subsystem:console /incremental:no\
+ /pdb:"$(OUTDIR)/gennmtab.pdb" /machine:I386 /out:"$(OUTDIR)/gennmtab.exe"
+LINK32_OBJS= \
+ "$(INTDIR)\gennmtab.obj"
+
+"$(OUTDIR)\gennmtab.exe" : "$(OUTDIR)" $(DEF_FILE) $(LINK32_OBJS)
+ $(LINK32) @<<
+ $(LINK32_FLAGS) $(LINK32_OBJS)
+<<
+
+!ELSEIF "$(CFG)" == "gennmtab - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "gennmtab\Debug"
+# PROP BASE Intermediate_Dir "gennmtab\Debug"
+# PROP BASE Target_Dir "gennmtab"
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "gennmtab\Debug"
+# PROP Intermediate_Dir "gennmtab\Debug"
+# PROP Target_Dir "gennmtab"
+OUTDIR=.\gennmtab\Debug
+INTDIR=.\gennmtab\Debug
+
+ALL : "$(OUTDIR)\gennmtab.exe"
+
+CLEAN :
+ -@erase "$(INTDIR)\gennmtab.obj"
+ -@erase "$(INTDIR)\vc40.idb"
+ -@erase "$(INTDIR)\vc40.pdb"
+ -@erase "$(OUTDIR)\gennmtab.exe"
+ -@erase "$(OUTDIR)\gennmtab.ilk"
+ -@erase "$(OUTDIR)\gennmtab.pdb"
+
+"$(OUTDIR)" :
+ if not exist "$(OUTDIR)/$(NULL)" mkdir "$(OUTDIR)"
+
+CPP=cl.exe
+# ADD BASE CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c
+# ADD CPP /nologo /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /YX /c
+CPP_PROJ=/nologo /MLd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE"\
+ /Fp"$(INTDIR)/gennmtab.pch" /YX /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c
+CPP_OBJS=.\gennmtab\Debug/
+CPP_SBRS=.\.
+
+.c{$(CPP_OBJS)}.obj:
+ $(CPP) $(CPP_PROJ) $<
+
+.cpp{$(CPP_OBJS)}.obj:
+ $(CPP) $(CPP_PROJ) $<
+
+.cxx{$(CPP_OBJS)}.obj:
+ $(CPP) $(CPP_PROJ) $<
+
+.c{$(CPP_SBRS)}.sbr:
+ $(CPP) $(CPP_PROJ) $<
+
+.cpp{$(CPP_SBRS)}.sbr:
+ $(CPP) $(CPP_PROJ) $<
+
+.cxx{$(CPP_SBRS)}.sbr:
+ $(CPP) $(CPP_PROJ) $<
+
+RSC=rc.exe
+# ADD BASE RSC /l 0x809 /d "_DEBUG"
+# ADD RSC /l 0x809 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+BSC32_FLAGS=/nologo /o"$(OUTDIR)/gennmtab.bsc"
+BSC32_SBRS= \
+
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386
+# ADD LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386
+LINK32_FLAGS=kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib\
+ advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib\
+ odbccp32.lib /nologo /subsystem:console /incremental:yes\
+ /pdb:"$(OUTDIR)/gennmtab.pdb" /debug /machine:I386\
+ /out:"$(OUTDIR)/gennmtab.exe"
+LINK32_OBJS= \
+ "$(INTDIR)\gennmtab.obj"
+
+"$(OUTDIR)\gennmtab.exe" : "$(OUTDIR)" $(DEF_FILE) $(LINK32_OBJS)
+ $(LINK32) @<<
+ $(LINK32_FLAGS) $(LINK32_OBJS)
+<<
+
!ENDIF
################################################################################
DEP_CPP_XMLTO=\
".\asciitab.h"\
+ ".\iasciitab.h"\
".\latin1tab.h"\
".\nametab.h"\
".\utf8tab.h"\
# ADD CPP /Ob2
-"$(INTDIR)\xmltok.obj" : $(SOURCE) $(DEP_CPP_XMLTO) "$(INTDIR)"
- $(CPP) /nologo /MT /W3 /GX /O2 /Ob2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS"\
- /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/" /c $(SOURCE)
+"$(INTDIR)\xmltok.obj" : $(SOURCE) $(DEP_CPP_XMLTO) "$(INTDIR)" ".\nametab.h"
+ $(CPP) /nologo /MT /W3 /GX /O2 /Ob2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D\
+ XMLTOKAPI=__declspec(dllexport) /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/"\
+ /c $(SOURCE)
!ELSEIF "$(CFG)" == "xmltok - Win32 Debug"
DEP_CPP_XMLTO=\
".\asciitab.h"\
+ ".\iasciitab.h"\
".\latin1tab.h"\
".\nametab.h"\
".\utf8tab.h"\
".\xmltok_impl.h"\
-"$(INTDIR)\xmltok.obj" : $(SOURCE) $(DEP_CPP_XMLTO) "$(INTDIR)"
- $(CPP) /nologo /MTd /W3 /Gm /GX /Zi /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS"\
- /Fp"$(INTDIR)/xmltok.pch" /YX /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c $(SOURCE)
+"$(INTDIR)\xmltok.obj" : $(SOURCE) $(DEP_CPP_XMLTO) "$(INTDIR)" ".\nametab.h"
+ $(CPP) /nologo /MTd /W3 /Gm /GX /Zi /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS"\
+ /D XMLTOKAPI=__declspec(dllexport) /Fp"$(INTDIR)/xmltok.pch" /YX\
+ /Fo"$(INTDIR)/" /Fd"$(INTDIR)/" /c $(SOURCE)
!ENDIF
SOURCE=.\dllmain.c
-!IF "$(CFG)" == "xmltok - Win32 Release"
-
-
"$(INTDIR)\dllmain.obj" : $(SOURCE) "$(INTDIR)"
-!ELSEIF "$(CFG)" == "xmltok - Win32 Debug"
-
-
-"$(INTDIR)\dllmain.obj" : $(SOURCE) "$(INTDIR)"
-
-
-!ENDIF
-
# End Source File
################################################################################
# Begin Source File
"$(INTDIR)\xmlrole.obj" : $(SOURCE) $(DEP_CPP_XMLRO) "$(INTDIR)"
+!ENDIF
+
+# End Source File
+################################################################################
+# Begin Source File
+
+SOURCE=.\gennmtab\Release\gennmtab.exe
+
+!IF "$(CFG)" == "xmltok - Win32 Release"
+
+# Begin Custom Build - Generating nametab.h
+InputPath=.\gennmtab\Release\gennmtab.exe
+
+"nametab.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+ $(InputPath) >nametab.h
+
+# End Custom Build
+
+!ELSEIF "$(CFG)" == "xmltok - Win32 Debug"
+
+# PROP Exclude_From_Build 1
+
+!ENDIF
+
+# End Source File
+################################################################################
+# Begin Project Dependency
+
+# Project_Dep_Name "gennmtab"
+
+!IF "$(CFG)" == "xmltok - Win32 Release"
+
+"gennmtab - Win32 Release" :
+ $(MAKE) /$(MAKEFLAGS) /F ".\xmltok.mak" CFG="gennmtab - Win32 Release"
+
+!ELSEIF "$(CFG)" == "xmltok - Win32 Debug"
+
+"gennmtab - Win32 Debug" :
+ $(MAKE) /$(MAKEFLAGS) /F ".\xmltok.mak" CFG="gennmtab - Win32 Debug"
+
+!ENDIF
+
+# End Project Dependency
+################################################################################
+# Begin Source File
+
+SOURCE=.\gennmtab\Debug\gennmtab.exe
+
+!IF "$(CFG)" == "xmltok - Win32 Release"
+
+# PROP Exclude_From_Build 1
+
+!ELSEIF "$(CFG)" == "xmltok - Win32 Debug"
+
+# Begin Custom Build - Generating nametab.h
+InputPath=.\gennmtab\Debug\gennmtab.exe
+
+"nametab.h" : $(SOURCE) "$(INTDIR)" "$(OUTDIR)"
+ $(InputPath) >nametab.h
+
+# End Custom Build
+
!ENDIF
# End Source File
SOURCE=.\xmlec\xmlec.c
DEP_CPP_XMLEC=\
".\xmltok.h"\
+ {$(INCLUDE)}"\sys\TYPES.H"\
"$(INTDIR)\xmlec.obj" : $(SOURCE) $(DEP_CPP_XMLEC) "$(INTDIR)"
# Begin Source File
SOURCE=.\xmlwf\wfcheck.c
+
+!IF "$(CFG)" == "xmlwf - Win32 Release"
+
DEP_CPP_WFCHE=\
".\xmlrole.h"\
".\xmltok.h"\
+ ".\xmlwf\hashtable.h"\
".\xmlwf\wfcheck.h"\
$(CPP) $(CPP_PROJ) $(SOURCE)
+!ELSEIF "$(CFG)" == "xmlwf - Win32 Debug"
+
+DEP_CPP_WFCHE=\
+ ".\xmlrole.h"\
+ ".\xmltok.h"\
+ ".\xmlwf\hashtable.h"\
+ ".\xmlwf\wfcheck.h"\
+
+
+"$(INTDIR)\wfcheck.obj" : $(SOURCE) $(DEP_CPP_WFCHE) "$(INTDIR)"
+ $(CPP) $(CPP_PROJ) $(SOURCE)
+
+
+!ENDIF
+
# End Source File
################################################################################
# Begin Source File
SOURCE=.\xmlwf\xmlwf.c
+
+!IF "$(CFG)" == "xmlwf - Win32 Release"
+
DEP_CPP_XMLWF=\
".\xmlwf\filemap.h"\
".\xmlwf\wfcheck.h"\
$(CPP) $(CPP_PROJ) $(SOURCE)
+!ELSEIF "$(CFG)" == "xmlwf - Win32 Debug"
+
+DEP_CPP_XMLWF=\
+ ".\xmlwf\filemap.h"\
+ ".\xmlwf\wfcheck.h"\
+
+
+"$(INTDIR)\xmlwf.obj" : $(SOURCE) $(DEP_CPP_XMLWF) "$(INTDIR)"
+ $(CPP) $(CPP_PROJ) $(SOURCE)
+
+
+!ENDIF
+
# End Source File
################################################################################
# Begin Source File
$(CPP) $(CPP_PROJ) $(SOURCE)
+# End Source File
+################################################################################
+# Begin Source File
+
+SOURCE=.\xmlwf\hashtable.c
+DEP_CPP_HASHT=\
+ ".\xmlwf\hashtable.h"\
+
+
+"$(INTDIR)\hashtable.obj" : $(SOURCE) $(DEP_CPP_HASHT) "$(INTDIR)"
+ $(CPP) $(CPP_PROJ) $(SOURCE)
+
+
+# End Source File
+################################################################################
+# Begin Source File
+
+SOURCE=.\xmlwf\unixfilemap.c
+DEP_CPP_UNIXF=\
+ ".\xmlwf\filemap.h"\
+ {$(INCLUDE)}"\sys\stat.h"\
+ {$(INCLUDE)}"\sys\TYPES.H"\
+
+# PROP Exclude_From_Build 1
+# End Source File
+################################################################################
+# Begin Source File
+
+SOURCE=.\xmlwf\readfilemap.c
+DEP_CPP_READF=\
+ {$(INCLUDE)}"\sys\stat.h"\
+ {$(INCLUDE)}"\sys\TYPES.H"\
+
+# PROP Exclude_From_Build 1
+# End Source File
+################################################################################
+# Begin Source File
+
+SOURCE=.\xmlwf\wfcheckmessage.c
+DEP_CPP_WFCHEC=\
+ ".\xmlwf\wfcheck.h"\
+
+
+"$(INTDIR)\wfcheckmessage.obj" : $(SOURCE) $(DEP_CPP_WFCHEC) "$(INTDIR)"
+ $(CPP) $(CPP_PROJ) $(SOURCE)
+
+
+# End Source File
+# End Target
+################################################################################
+# Begin Target
+
+# Name "gennmtab - Win32 Release"
+# Name "gennmtab - Win32 Debug"
+
+!IF "$(CFG)" == "gennmtab - Win32 Release"
+
+!ELSEIF "$(CFG)" == "gennmtab - Win32 Debug"
+
+!ENDIF
+
+################################################################################
+# Begin Source File
+
+SOURCE=.\gennmtab\gennmtab.c
+
+!IF "$(CFG)" == "gennmtab - Win32 Release"
+
+
+"$(INTDIR)\gennmtab.obj" : $(SOURCE) "$(INTDIR)"
+ $(CPP) $(CPP_PROJ) $(SOURCE)
+
+
+!ELSEIF "$(CFG)" == "gennmtab - Win32 Debug"
+
+
+"$(INTDIR)\gennmtab.obj" : $(SOURCE) "$(INTDIR)"
+ $(CPP) $(CPP_PROJ) $(SOURCE)
+
+
+!ENDIF
+
# End Source File
# End Target
# End Project
/* 0xEC */ BT_LEAD3, BT_LEAD3, BT_LEAD3, BT_LEAD3,
/* 0xF0 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4,
/* 0xF4 */ BT_LEAD4, BT_LEAD4, BT_LEAD4, BT_LEAD4,
-/* 0xF8 */ BT_LEAD5, BT_LEAD5, BT_LEAD5, BT_LEAD5,
-/* 0xFC */ BT_LEAD6, BT_LEAD6, BT_MALFORM, BT_MALFORM,
+/* 0xF8 */ BT_NONXML, BT_NONXML, BT_NONXML, BT_NONXML,
+/* 0xFC */ BT_NONXML, BT_NONXML, BT_MALFORM, BT_MALFORM,
-#ifdef _MSC_VER
-#define XMLTOKAPI __declspec(dllexport)
-#endif
#include "xmlrole.h"
/* Doesn't check:
that ,| are not mixed in a model group
content of literals
-Separate handler for external internalSubsets
-
-Level 0 == outside DTD
-Level 1 == in DTD internalSubset
-Level 2 == in Declaration
-Level 3 == in Group
*/
-PROLOG_HANDLER
- prolog0, prolog1,
+typedef int PROLOG_HANDLER(struct prolog_state *state,
+ int tok,
+ const char *ptr,
+ const char *end,
+ const ENCODING *enc);
+
+static PROLOG_HANDLER
+ prolog0, prolog1, prolog2,
doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
internalSubset,
entity0, entity1, entity2, entity3, entity4, entity5, entity6,
- notation0, notation1, notation2, notation3,
+ entity7, entity8, entity9,
+ notation0, notation1, notation2, notation3, notation4,
attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
attlist7, attlist8, attlist9,
element0, element1, element2, element3, element4, element5, element6,
declClose,
error;
-PROLOG_HANDLER ignore, declParamEntityRef;
-
-PROLOG_HANDLER declParamEntityRef;
-
static
int syntaxError(PROLOG_STATE *);
const ENCODING *enc)
{
switch (tok) {
- case XML_TOK_BOM:
case XML_TOK_PI:
+ state->handler = prolog1;
+ if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml"))
+ return XML_ROLE_XML_DECL;
+ return XML_ROLE_NONE;
case XML_TOK_COMMENT:
+ state->handler = prolog1;
+ case XML_TOK_BOM:
return XML_ROLE_NONE;
case XML_TOK_DECL_OPEN:
if (!XmlNameMatchesAscii(enc,
break;
state->handler = doctype0;
return XML_ROLE_NONE;
- case XML_TOK_START_TAG_WITH_ATTS:
- case XML_TOK_START_TAG_NO_ATTS:
- case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
- case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
+ case XML_TOK_INSTANCE_START:
state->handler = error;
return XML_ROLE_INSTANCE_START;
}
{
switch (tok) {
case XML_TOK_PI:
+ if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml"))
+ return syntaxError(state);
+ case XML_TOK_COMMENT:
+ case XML_TOK_BOM:
+ return XML_ROLE_NONE;
+ case XML_TOK_DECL_OPEN:
+ if (!XmlNameMatchesAscii(enc,
+ ptr + 2 * enc->minBytesPerChar,
+ "DOCTYPE"))
+ break;
+ state->handler = doctype0;
+ return XML_ROLE_NONE;
+ case XML_TOK_INSTANCE_START:
+ state->handler = error;
+ return XML_ROLE_INSTANCE_START;
+ }
+ return syntaxError(state);
+}
+
+static
+int prolog2(PROLOG_STATE *state,
+ int tok,
+ const char *ptr,
+ const char *end,
+ const ENCODING *enc)
+{
+ switch (tok) {
+ case XML_TOK_PI:
+ if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml"))
+ return syntaxError(state);
case XML_TOK_COMMENT:
return XML_ROLE_NONE;
- case XML_TOK_START_TAG_WITH_ATTS:
- case XML_TOK_START_TAG_NO_ATTS:
- case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
- case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
+ case XML_TOK_INSTANCE_START:
state->handler = error;
return XML_ROLE_INSTANCE_START;
}
return syntaxError(state);
}
+static
int doctype1(PROLOG_STATE *state,
int tok,
const char *ptr,
state->handler = internalSubset;
return XML_ROLE_NONE;
case XML_TOK_DECL_CLOSE:
- state->handler = prolog1;
+ state->handler = prolog2;
return XML_ROLE_DOCTYPE_CLOSE;
case XML_TOK_NAME:
if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) {
return syntaxError(state);
}
+static
int doctype2(PROLOG_STATE *state,
int tok,
const char *ptr,
return syntaxError(state);
}
+static
int doctype3(PROLOG_STATE *state,
int tok,
const char *ptr,
return syntaxError(state);
}
+static
int doctype4(PROLOG_STATE *state,
int tok,
const char *ptr,
state->handler = internalSubset;
return XML_ROLE_NONE;
case XML_TOK_DECL_CLOSE:
- state->handler = prolog1;
+ state->handler = prolog2;
return XML_ROLE_DOCTYPE_CLOSE;
}
return syntaxError(state);
}
+static
int doctype5(PROLOG_STATE *state,
int tok,
const char *ptr,
{
switch (tok) {
case XML_TOK_DECL_CLOSE:
- state->handler = prolog1;
+ state->handler = prolog2;
return XML_ROLE_DOCTYPE_CLOSE;
}
return syntaxError(state);
static
int internalSubset(PROLOG_STATE *state,
- int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc)
+ int tok,
+ const char *ptr,
+ const char *end,
+ const ENCODING *enc)
{
switch (tok) {
case XML_TOK_DECL_OPEN:
}
if (XmlNameMatchesAscii(enc,
ptr + 2 * enc->minBytesPerChar,
- "ELEMENT")) {
- state->handler = element0;
+ "ATTLIST")) {
+ state->handler = attlist0;
return XML_ROLE_NONE;
}
if (XmlNameMatchesAscii(enc,
ptr + 2 * enc->minBytesPerChar,
- "ATTLIST")) {
- state->handler = attlist0;
+ "ELEMENT")) {
+ state->handler = element0;
return XML_ROLE_NONE;
}
if (XmlNameMatchesAscii(enc,
}
break;
case XML_TOK_PI:
+ if (XmlNameMatchesAscii(enc, ptr + 2 * enc->minBytesPerChar, "xml"))
+ return syntaxError(state);
case XML_TOK_COMMENT:
case XML_TOK_PARAM_ENTITY_REF:
return XML_ROLE_NONE;
{
switch (tok) {
case XML_TOK_NAME:
- state->handler = entity2;
+ state->handler = entity7;
return XML_ROLE_PARAM_ENTITY_NAME;
}
return syntaxError(state);
return syntaxError(state);
}
+static
+int entity7(PROLOG_STATE *state,
+ int tok,
+ const char *ptr,
+ const char *end,
+ const ENCODING *enc)
+{
+ switch (tok) {
+ case XML_TOK_NAME:
+ if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) {
+ state->handler = entity9;
+ return XML_ROLE_NONE;
+ }
+ if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) {
+ state->handler = entity8;
+ return XML_ROLE_NONE;
+ }
+ break;
+ case XML_TOK_LITERAL:
+ state->handler = declClose;
+ return XML_ROLE_ENTITY_VALUE;
+ }
+ return syntaxError(state);
+}
+
+static
+int entity8(PROLOG_STATE *state,
+ int tok,
+ const char *ptr,
+ const char *end,
+ const ENCODING *enc)
+{
+ switch (tok) {
+ case XML_TOK_LITERAL:
+ state->handler = entity9;
+ return XML_ROLE_ENTITY_PUBLIC_ID;
+ }
+ return syntaxError(state);
+}
+
+static
+int entity9(PROLOG_STATE *state,
+ int tok,
+ const char *ptr,
+ const char *end,
+ const ENCODING *enc)
+{
+ switch (tok) {
+ case XML_TOK_LITERAL:
+ state->handler = declClose;
+ return XML_ROLE_ENTITY_SYSTEM_ID;
+ }
+ return syntaxError(state);
+}
+
static
int notation0(PROLOG_STATE *state,
int tok,
{
switch (tok) {
case XML_TOK_LITERAL:
- state->handler = notation3;
+ state->handler = notation4;
return XML_ROLE_NOTATION_PUBLIC_ID;
}
return syntaxError(state);
return syntaxError(state);
}
+static
+int notation4(PROLOG_STATE *state,
+ int tok,
+ const char *ptr,
+ const char *end,
+ const ENCODING *enc)
+{
+ switch (tok) {
+ case XML_TOK_LITERAL:
+ state->handler = declClose;
+ return XML_ROLE_NOTATION_SYSTEM_ID;
+ case XML_TOK_DECL_CLOSE:
+ state->handler = internalSubset;
+ return XML_ROLE_NONE;
+ }
+ return syntaxError(state);
+}
+
static
int attlist0(PROLOG_STATE *state,
int tok,
"NMTOKENS",
};
int i;
- for (i = 0; i < sizeof(types)/sizeof(types[0]); i++)
+ for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
if (XmlNameMatchesAscii(enc, ptr, types[i])) {
state->handler = attlist8;
return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
break;
case XML_TOK_OPEN_PAREN:
state->handler = element2;
- return XML_ROLE_CONTENT_GROUP_OPEN;
+ state->level = 1;
+ return XML_ROLE_GROUP_OPEN;
}
return syntaxError(state);
}
case XML_TOK_OPEN_PAREN:
state->level = 2;
state->handler = element6;
- return XML_ROLE_CONTENT_GROUP_OPEN;
+ return XML_ROLE_GROUP_OPEN;
case XML_TOK_NAME:
- state->level = 1;
state->handler = element7;
return XML_ROLE_CONTENT_ELEMENT;
case XML_TOK_NAME_QUESTION:
- state->level = 1;
state->handler = element7;
return XML_ROLE_CONTENT_ELEMENT_OPT;
case XML_TOK_NAME_ASTERISK:
- state->level = 1;
state->handler = element7;
return XML_ROLE_CONTENT_ELEMENT_REP;
case XML_TOK_NAME_PLUS:
- state->level = 1;
state->handler = element7;
return XML_ROLE_CONTENT_ELEMENT_PLUS;
}
case XML_TOK_CLOSE_PAREN:
case XML_TOK_CLOSE_PAREN_ASTERISK:
state->handler = declClose;
- return XML_ROLE_END_GROUP_REP;
+ return XML_ROLE_GROUP_CLOSE_REP;
case XML_TOK_OR:
state->handler = element4;
return XML_ROLE_NONE;
switch (tok) {
case XML_TOK_CLOSE_PAREN_ASTERISK:
state->handler = declClose;
- return XML_ROLE_END_GROUP_REP;
+ return XML_ROLE_GROUP_CLOSE_REP;
case XML_TOK_OR:
state->handler = element4;
return XML_ROLE_NONE;
switch (tok) {
case XML_TOK_OPEN_PAREN:
state->level += 1;
- return XML_ROLE_CONTENT_GROUP_OPEN;
+ return XML_ROLE_GROUP_OPEN;
case XML_TOK_NAME:
state->handler = element7;
return XML_ROLE_CONTENT_ELEMENT;
state->level -= 1;
if (state->level == 0)
state->handler = declClose;
- return XML_ROLE_END_GROUP;
+ return XML_ROLE_GROUP_CLOSE;
case XML_TOK_CLOSE_PAREN_ASTERISK:
state->level -= 1;
if (state->level == 0)
state->handler = declClose;
- return XML_ROLE_END_GROUP_REP;
+ return XML_ROLE_GROUP_CLOSE_REP;
case XML_TOK_CLOSE_PAREN_QUESTION:
state->level -= 1;
if (state->level == 0)
state->handler = declClose;
- return XML_ROLE_END_GROUP_OPT;
+ return XML_ROLE_GROUP_CLOSE_OPT;
case XML_TOK_CLOSE_PAREN_PLUS:
state->level -= 1;
if (state->level == 0)
state->handler = declClose;
- return XML_ROLE_END_GROUP_PLUS;
+ return XML_ROLE_GROUP_CLOSE_PLUS;
case XML_TOK_COMMA:
state->handler = element6;
return XML_ROLE_GROUP_SEQUENCE;
return syntaxError(state);
}
+#if 0
+
static
int ignore(PROLOG_STATE *state,
int tok,
}
return syntaxError(state);
}
+#endif
static
int error(PROLOG_STATE *state,
enum {
XML_ROLE_ERROR = -1,
XML_ROLE_NONE = 0,
+ XML_ROLE_XML_DECL,
+ XML_ROLE_INSTANCE_START,
XML_ROLE_DOCTYPE_NAME,
XML_ROLE_DOCTYPE_SYSTEM_ID,
XML_ROLE_DOCTYPE_PUBLIC_ID,
XML_ROLE_CONTENT_ANY,
XML_ROLE_CONTENT_EMPTY,
XML_ROLE_CONTENT_PCDATA,
- XML_ROLE_CONTENT_GROUP_OPEN,
- XML_ROLE_END_GROUP,
- XML_ROLE_END_GROUP_REP,
- XML_ROLE_END_GROUP_OPT,
- XML_ROLE_END_GROUP_PLUS,
+ XML_ROLE_GROUP_OPEN,
+ XML_ROLE_GROUP_CLOSE,
+ XML_ROLE_GROUP_CLOSE_REP,
+ XML_ROLE_GROUP_CLOSE_OPT,
+ XML_ROLE_GROUP_CLOSE_PLUS,
XML_ROLE_GROUP_CHOICE,
XML_ROLE_GROUP_SEQUENCE,
XML_ROLE_CONTENT_ELEMENT,
XML_ROLE_CONTENT_ELEMENT_PLUS
};
-typedef int PROLOG_HANDLER(struct prolog_state *state,
- int tok,
- const char *ptr,
- const char *end,
- const ENCODING *enc);
-
typedef struct prolog_state {
- PROLOG_HANDLER *handler;
+ int (*handler)(struct prolog_state *state,
+ int tok,
+ const char *ptr,
+ const char *end,
+ const ENCODING *enc);
unsigned level;
} PROLOG_STATE;
-#ifdef _MSC_VER
-#define XMLTOKAPI __declspec(dllexport)
-#endif
-
#include "xmltok.h"
#include "nametab.h"
+#define VTABLE1 \
+ { PREFIX(prologTok), PREFIX(contentTok) }, \
+ { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
+ PREFIX(sameName), \
+ PREFIX(nameMatchesAscii), \
+ PREFIX(nameLength), \
+ PREFIX(getAtts), \
+ PREFIX(charRefNumber), \
+ PREFIX(updatePosition), \
+ PREFIX(isPublicId), \
+ PREFIX(isSystemId)
+
+#define VTABLE2 \
+ PREFIX(encode), \
+ { PREFIX(toUtf8) }
+
+#define VTABLE VTABLE1, VTABLE2
+
#define UCS2_GET_NAMING(pages, hi, lo) \
(namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
: 0))
-
-#include "xmltok_impl.h"
-
struct normal_encoding {
ENCODING enc;
unsigned char type[256];
};
+static const struct normal_encoding latin1_encoding;
+
+#define latin1tab (latin1_encoding.type)
+
+#include "xmltok_impl.h"
+
/* minimum bytes per character */
#define MINBPC 1
#define BYTE_TYPE(enc, p) \
(((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
+#define BYTE_TO_ASCII(enc, p) (*p)
#define IS_NAME_CHAR(enc, p, n) UTF8_GET_NAMING(namePages, p, n)
#define IS_NMSTRT_CHAR(enc, p, n) UTF8_GET_NAMING(nmstrtPages, p, n)
#undef MINBPC
#undef BYTE_TYPE
+#undef BYTE_TO_ASCII
#undef CHAR_MATCHES
#undef IS_NAME_CHAR
#undef IS_NMSTRT_CHAR
-const struct normal_encoding utf8_encoding = {
- { { PREFIX(prologTok), PREFIX(contentTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii), PREFIX(getAtts), PREFIX(updatePosition), 1 },
+enum {
+ // cvalN is value of masked first byte of N byte sequence
+ cval1 = 0x00,
+ cval2 = 0xc0,
+ cval3 = 0xe0,
+ cval4 = 0xf0,
+ // minN is minimum legal resulting value for N byte sequence
+ min2 = 0x80,
+ min3 = 0x800,
+ min4 = 0x10000
+};
+
+static
+int utf8_encode(const ENCODING *enc, int c, char *buf)
+{
+ if (c < 0)
+ return 0;
+ if (c < min2) {
+ buf[0] = (c | cval1);
+ return 1;
+ }
+ if (c < min3) {
+ buf[0] = ((c >> 6) | cval2);
+ buf[1] = ((c & 0x3f) | 0x80);
+ return 2;
+ }
+ if (c < min4) {
+ buf[0] = ((c >> 12) | cval3);
+ buf[1] = (((c >> 6) & 0x3f) | 0x80);
+ buf[2] = ((c & 0x3f) | 0x80);
+ return 3;
+ }
+ if (c < 0x110000) {
+ buf[0] = ((c >> 18) | cval4);
+ buf[1] = (((c >> 12) & 0x3f) | 0x80);
+ buf[2] = (((c >> 6) & 0x3f) | 0x80);
+ buf[3] = ((c & 0x3f) | 0x80);
+ return 3;
+ }
+ return 0;
+}
+
+static
+void utf8_toUtf8(const ENCODING *enc,
+ const char **fromP, const char *fromLim,
+ char **toP, const char *toLim)
+{
+ char *to;
+ const char *from;
+ if (fromLim - *fromP > toLim - *toP) {
+ /* Avoid copying partial characters. */
+ for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
+ if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
+ break;
+ }
+ for (to = *toP, from = *fromP; from != fromLim; from++, to++)
+ *to = *from;
+ *fromP = from;
+ *toP = to;
+}
+
+static const struct normal_encoding utf8_encoding = {
+ { VTABLE1, utf8_encode, { utf8_toUtf8 }, 1 },
+ {
#include "asciitab.h"
#include "utf8tab.h"
+ }
};
-#undef PREFIX
+static const struct normal_encoding internal_utf8_encoding = {
+ { VTABLE1, utf8_encode, { utf8_toUtf8 }, 1 },
+ {
+#include "iasciitab.h"
+#include "utf8tab.h"
+ }
+};
+
+static
+int latin1_encode(const ENCODING *enc, int c, char *buf)
+{
+ if (c < 0)
+ return 0;
+ if (c <= 0xFF) {
+ buf[0] = (char)c;
+ return 1;
+ }
+ return 0;
+}
+
+static
+void latin1_toUtf8(const ENCODING *enc,
+ const char **fromP, const char *fromLim,
+ char **toP, const char *toLim)
+{
+ for (;;) {
+ unsigned char c;
+ if (*fromP == fromLim)
+ break;
+ c = (unsigned char)**fromP;
+ if (c & 0x80) {
+ if (toLim - *toP < 2)
+ break;
+ *(*toP)++ = ((c >> 6) | cval2);
+ *(*toP)++ = ((c & 0x3f) | 0x80);
+ }
+ else {
+ if (*toP == toLim)
+ break;
+ *(*toP)++ = *(*fromP)++;
+ }
+ }
+}
-static unsigned char latin1tab[256] = {
+static const struct normal_encoding latin1_encoding = {
+ { VTABLE1, latin1_encode, { latin1_toUtf8 }, 1 },
+ {
#include "asciitab.h"
#include "latin1tab.h"
+ }
};
+#define latin1tab (latin1_encoding.type)
+
+#undef PREFIX
+
static int unicode_byte_type(char hi, char lo)
{
switch ((unsigned char)hi) {
return BT_NONASCII;
}
+#define DEFINE_UTF16_ENCODE \
+static \
+int PREFIX(encode)(const ENCODING *enc, int charNum, char *buf) \
+{ \
+ if (charNum < 0) \
+ return 0; \
+ if (charNum < 0x10000) { \
+ SET2(buf, charNum); \
+ return 2; \
+ } \
+ if (charNum < 0x110000) { \
+ charNum -= 0x10000; \
+ SET2(buf, (charNum >> 10) + 0xD800); \
+ SET2(buf + 2, (charNum & 0x3FF) + 0xDC00); \
+ return 4; \
+ } \
+ return 0; \
+}
+
+#define DEFINE_UTF16_TO_UTF8 \
+static \
+void PREFIX(toUtf8)(const ENCODING *enc, \
+ const char **fromP, const char *fromLim, \
+ char **toP, const char *toLim) \
+{ \
+ const char *from; \
+ for (from = *fromP; from != fromLim; from += 2) { \
+ unsigned char lo2; \
+ unsigned char lo = GET_LO(from); \
+ unsigned char hi = GET_HI(from); \
+ switch (hi) { \
+ case 0: \
+ if (*toP == toLim) \
+ return; \
+ *(*toP)++ = lo; \
+ break; \
+ case 0x1: case 0x2: case 0x3: \
+ case 0x4: case 0x5: case 0x6: case 0x7: \
+ if (toLim - *toP < 2) \
+ return; \
+ *(*toP)++ = ((lo >> 6) | (hi << 2) | cval2); \
+ *(*toP)++ = ((lo & 0x3f) | 0x80); \
+ break; \
+ default: \
+ if (toLim - *toP < 3) \
+ return; \
+ /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
+ *(*toP)++ = ((hi >> 4) | cval3); \
+ *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
+ *(*toP)++ = ((lo & 0x3f) | 0x80); \
+ break; \
+ case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
+ if (toLim - *toP < 4) \
+ return; \
+ /* IIIIIIWW XXXXXXYY IIIIIIYY YYZZZZZ => */ \
+ /* JJJJJJWW JJXXXXXX JJYYYYYY JJZZZZZ */ \
+ *(*toP)++ = ((hi & 0x3) | cval4); \
+ *(*toP)++ = ((lo >> 2) | 0x80); \
+ from += 2; \
+ lo2 = GET_LO(from); \
+ *(*toP)++ = (((lo & 0x3) << 4) \
+ | ((GET_HI(from) & 0x3) << 2) \
+ | (lo2 >> 6) \
+ | 0x80); \
+ *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
+ break; \
+ } \
+ } \
+}
+
#define PREFIX(ident) little2_ ## ident
#define MINBPC 2
#define BYTE_TYPE(enc, p) \
((p)[1] == 0 ? latin1tab[(unsigned char)*(p)] : unicode_byte_type((p)[1], (p)[0]))
+#define BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
#define CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
#define IS_NAME_CHAR(enc, p, n) \
UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
#include "xmltok_impl.c"
+#define SET2(ptr, ch) \
+ (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
+#define GET_LO(ptr) ((unsigned char)(ptr)[0])
+#define GET_HI(ptr) ((unsigned char)(ptr)[1])
+
+DEFINE_UTF16_ENCODE
+DEFINE_UTF16_TO_UTF8
+
+#undef SET2
+#undef GET_LO
+#undef GET_HI
#undef MINBPC
#undef BYTE_TYPE
+#undef BYTE_TO_ASCII
#undef CHAR_MATCHES
#undef IS_NAME_CHAR
#undef IS_NMSTRT_CHAR
-const struct encoding little2_encoding = {
- { PREFIX(prologTok), PREFIX(contentTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii), PREFIX(getAtts), PREFIX(updatePosition), 2
-};
+static const struct encoding little2_encoding = { VTABLE, 2 };
#undef PREFIX
/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
#define BYTE_TYPE(enc, p) \
((p)[0] == 0 ? latin1tab[(unsigned char)(p)[1]] : unicode_byte_type((p)[0], (p)[1]))
+#define BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
#define CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
#define IS_NAME_CHAR(enc, p, n) \
UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
#include "xmltok_impl.c"
+#define SET2(ptr, ch) \
+ (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
+#define GET_LO(ptr) ((unsigned char)(ptr)[1])
+#define GET_HI(ptr) ((unsigned char)(ptr)[0])
+
+DEFINE_UTF16_ENCODE
+DEFINE_UTF16_TO_UTF8
+
+#undef SET2
+#undef GET_LO
+#undef GET_HI
#undef MINBPC
#undef BYTE_TYPE
+#undef BYTE_TO_ASCII
#undef CHAR_MATCHES
#undef IS_NAME_CHAR
#undef IS_NMSTRT_CHAR
-const struct encoding big2_encoding = {
- { PREFIX(prologTok), PREFIX(contentTok) }, PREFIX(sameName), PREFIX(nameMatchesAscii), PREFIX(getAtts), PREFIX(updatePosition), 2
-};
+static const struct encoding big2_encoding = { VTABLE, 2 };
#undef PREFIX
normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
}
+const ENCODING *XmlGetInternalEncoding(int e)
+{
+ switch (e) {
+ case XML_UTF8_ENCODING:
+ return &internal_utf8_encoding.enc;
+ }
+ return 0;
+}
+
void XmlInitEncoding(INIT_ENCODING *p, const ENCODING **encPtr)
{
p->initEnc.scanners[XML_PROLOG_STATE] = initScanProlog;
p->encPtr = encPtr;
*encPtr = &(p->initEnc);
}
+
+static
+int toAscii(const ENCODING *enc, const char *ptr, const char *end)
+{
+ char buf[1];
+ char *p = buf;
+ XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &p, p + 1);
+ if (p == buf)
+ return -1;
+ else
+ return buf[0];
+}
+
+static
+int isSpace(int c)
+{
+ switch (c) {
+ case ' ':
+ case '\r':
+ case '\n':
+ case '\t':
+ return 1;
+ }
+ return 0;
+}
+
+/* Return 1 if there's just optional white space
+or there's an S followed by name=val. */
+static
+int parsePseudoAttribute(const ENCODING *enc,
+ const char *ptr,
+ const char *end,
+ const char **namePtr,
+ const char **valPtr,
+ const char **nextTokPtr)
+{
+ int c;
+ char open;
+ if (ptr == end) {
+ *namePtr = 0;
+ return 1;
+ }
+ if (!isSpace(toAscii(enc, ptr, end))) {
+ *nextTokPtr = ptr;
+ return 0;
+ }
+ do {
+ ptr += enc->minBytesPerChar;
+ } while (isSpace(toAscii(enc, ptr, end)));
+ if (ptr == end) {
+ *namePtr = 0;
+ return 1;
+ }
+ *namePtr = ptr;
+ for (;;) {
+ c = toAscii(enc, ptr, end);
+ if (c == -1) {
+ *nextTokPtr = ptr;
+ return 0;
+ }
+ if (c == '=')
+ break;
+ if (isSpace(c)) {
+ do {
+ ptr += enc->minBytesPerChar;
+ } while (isSpace(c = toAscii(enc, ptr, end)));
+ if (c != '=') {
+ *nextTokPtr = ptr;
+ return 0;
+ }
+ break;
+ }
+ ptr += enc->minBytesPerChar;
+ }
+ if (ptr == *namePtr) {
+ *nextTokPtr = ptr;
+ return 0;
+ }
+ ptr += enc->minBytesPerChar;
+ c = toAscii(enc, ptr, end);
+ while (isSpace(c)) {
+ ptr += enc->minBytesPerChar;
+ c = toAscii(enc, ptr, end);
+ }
+ if (c != '"' && c != '\'') {
+ *nextTokPtr = ptr;
+ return 0;
+ }
+ open = c;
+ ptr += enc->minBytesPerChar;
+ *valPtr = ptr;
+ for (;; ptr += enc->minBytesPerChar) {
+ c = toAscii(enc, ptr, end);
+ if (c == open)
+ break;
+ if (!('a' <= c && c <= 'z')
+ && !('A' <= c && c <= 'Z')
+ && !('0' <= c && c <= '9')
+ && c != '.'
+ && c != '-'
+ && c != '_') {
+ *nextTokPtr = ptr;
+ return 0;
+ }
+ }
+ *nextTokPtr = ptr + enc->minBytesPerChar;
+ return 1;
+}
+
+static
+int streq(const char *s1, const char *s2)
+{
+ for (; *s1 == *s2; s1++, s2++)
+ if (!*s1)
+ return 1;
+ return 0;
+}
+
+static
+const ENCODING *findEncoding(const ENCODING *enc, const char *ptr, const char *end)
+{
+#define ENCODING_MAX 128
+ char buf[ENCODING_MAX];
+ char *p = buf;
+ int i;
+ XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &p, p + ENCODING_MAX - 1);
+ if (ptr != end)
+ return 0;
+ *p = 0;
+ for (i = 0; buf[i]; i++) {
+ if ('a' <= buf[i] && buf[i] <= 'z')
+ buf[i] += 'A' - 'a';
+ }
+ if (streq(buf, "UTF-8"))
+ return &utf8_encoding.enc;
+ if (streq(buf, "ISO-8859-1"))
+ return &latin1_encoding.enc;
+ if (streq(buf, "UTF-16")) {
+ static const unsigned short n = 1;
+ if (enc->minBytesPerChar == 2)
+ return enc;
+ if (*(const char *)&n)
+ return &little2_encoding;
+ else
+ return &big2_encoding;
+ }
+ return 0;
+}
+
+int XmlParseXmlDecl(int isGeneralTextEntity,
+ const ENCODING *enc,
+ const char *ptr,
+ const char *end,
+ const char **badPtr,
+ const char **versionPtr,
+ const char **encodingName,
+ const ENCODING **encoding,
+ int *standalone)
+{
+ const char *val = 0;
+ const char *name = 0;
+ ptr += 5 * enc->minBytesPerChar;
+ end -= 2 * enc->minBytesPerChar;
+ if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr) || !name) {
+ *badPtr = ptr;
+ return 0;
+ }
+ if (!XmlNameMatchesAscii(enc, name, "version")) {
+ if (!isGeneralTextEntity) {
+ *badPtr = name;
+ return 0;
+ }
+ }
+ else {
+ if (versionPtr)
+ *versionPtr = val;
+ if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
+ *badPtr = ptr;
+ return 0;
+ }
+ if (!name)
+ return 1;
+ }
+ if (XmlNameMatchesAscii(enc, name, "encoding")) {
+ int c = toAscii(enc, val, end);
+ if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) {
+ *badPtr = val;
+ return 0;
+ }
+ if (encodingName)
+ *encodingName = val;
+ if (encoding)
+ *encoding = findEncoding(enc, val, ptr - enc->minBytesPerChar);
+ if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
+ *badPtr = ptr;
+ return 0;
+ }
+ if (!name)
+ return 1;
+ }
+ if (!XmlNameMatchesAscii(enc, name, "standalone") || isGeneralTextEntity) {
+ *badPtr = name;
+ return 0;
+ }
+ if (XmlNameMatchesAscii(enc, val, "yes")) {
+ if (standalone)
+ *standalone = 1;
+ }
+ else if (XmlNameMatchesAscii(enc, val, "no")) {
+ if (standalone)
+ *standalone = 0;
+ }
+ else {
+ *badPtr = val;
+ return 0;
+ }
+ while (isSpace(toAscii(enc, ptr, end)))
+ ptr += enc->minBytesPerChar;
+ if (ptr != end) {
+ *badPtr = ptr;
+ return 0;
+ }
+ return 1;
+}
#endif
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
-#define XML_TOK_NONE -3 /* The string to be scanned is empty */
+#define XML_TOK_NONE -4 /* The string to be scanned is empty */
+#define XML_TOK_TRAILING_CR -3 /* A CR at the end of the scan;
+ might be part of CRLF sequence */
#define XML_TOK_PARTIAL_CHAR -2 /* only part of a multibyte sequence */
#define XML_TOK_PARTIAL -1 /* only part of a token */
#define XML_TOK_INVALID 0
-/* The following token is returned by XmlPrologTok when it detects the end
-of the prolog and is also returned by XmlContentTok */
+/* The following tokens are returned by XmlContentTok; some are also
+ returned by XmlAttributeValueTok and XmlEntityTok */
#define XML_TOK_START_TAG_WITH_ATTS 1
#define XML_TOK_START_TAG_NO_ATTS 2
#define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */
#define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4
-
-/* The following tokens are returned only by XmlContentTok */
-
#define XML_TOK_END_TAG 5
#define XML_TOK_DATA_CHARS 6
-#define XML_TOK_CDATA_SECTION 7
-#define XML_TOK_ENTITY_REF 8
-#define XML_TOK_CHAR_REF 9 /* numeric character reference */
+#define XML_TOK_DATA_NEWLINE 7
+#define XML_TOK_CDATA_SECTION 8
+#define XML_TOK_ENTITY_REF 9
+#define XML_TOK_CHAR_REF 10 /* numeric character reference */
/* The following tokens may be returned by both XmlPrologTok and XmlContentTok */
-#define XML_TOK_PI 10 /* processing instruction */
-#define XML_TOK_COMMENT 11
-#define XML_TOK_BOM 12 /* Byte order mark */
+#define XML_TOK_PI 11 /* processing instruction */
+#define XML_TOK_COMMENT 12
+#define XML_TOK_BOM 13 /* Byte order mark */
/* The following tokens are returned only by XmlPrologTok */
-#define XML_TOK_LITERAL 13
-#define XML_TOK_PARAM_ENTITY_REF 14
+#define XML_TOK_INSTANCE_START 14
#define XML_TOK_PROLOG_S 15
#define XML_TOK_DECL_OPEN 16 /* <!foo */
#define XML_TOK_DECL_CLOSE 17 /* > */
#define XML_TOK_CLOSE_PAREN 24
#define XML_TOK_OPEN_BRACKET 25
#define XML_TOK_CLOSE_BRACKET 26
+#define XML_TOK_LITERAL 27
+#define XML_TOK_PARAM_ENTITY_REF 28
+
/* The following occur only in element type declarations */
-#define XML_TOK_COMMA 27
-#define XML_TOK_CLOSE_PAREN_QUESTION 28 /* )? */
-#define XML_TOK_CLOSE_PAREN_ASTERISK 29 /* )* */
-#define XML_TOK_CLOSE_PAREN_PLUS 30 /* )+ */
-#define XML_TOK_NAME_QUESTION 31 /* name? */
-#define XML_TOK_NAME_ASTERISK 32 /* name* */
-#define XML_TOK_NAME_PLUS 33 /* name+ */
-#define XML_TOK_COND_SECT_OPEN 34 /* <![ */
-#define XML_TOK_COND_SECT_CLOSE 35 /* ]]> */
-
-#define XML_NSTATES 2
+#define XML_TOK_COMMA 29
+#define XML_TOK_NAME_QUESTION 30 /* name? */
+#define XML_TOK_NAME_ASTERISK 31 /* name* */
+#define XML_TOK_NAME_PLUS 32 /* name+ */
+#define XML_TOK_COND_SECT_OPEN 33 /* <![ */
+#define XML_TOK_COND_SECT_CLOSE 34 /* ]]> */
+#define XML_TOK_CLOSE_PAREN_QUESTION 35 /* )? */
+#define XML_TOK_CLOSE_PAREN_ASTERISK 36 /* )* */
+#define XML_TOK_CLOSE_PAREN_PLUS 37 /* )+ */
+
+
+#define XML_N_STATES 2
#define XML_PROLOG_STATE 0
#define XML_CONTENT_STATE 1
+#define XML_N_LITERAL_TYPES 2
+#define XML_ATTRIBUTE_VALUE_LITERAL 0
+#define XML_ENTITY_VALUE_LITERAL 1
+
+#define XML_N_INTERNAL_ENCODINGS 1
+#define XML_UTF8_ENCODING 0
+#if 0
+#define XML_UTF16_ENCODING 1
+#define XML_UCS4_ENCODING 2
+#endif
+
+#define XML_MAX_BYTES_PER_CHAR 4
+
typedef struct position {
/* first line and first column are 0 not 1 */
unsigned long lineNumber;
unsigned long columnNumber;
- /* if the last character counted was CR, then an immediately
- following LF should be ignored */
- int ignoreInitialLF;
} POSITION;
-typedef struct encoding {
- int (*scanners[XML_NSTATES])(const struct encoding *,
- const char *,
- const char *,
- const char **);
- int (*sameName)(const struct encoding *,
+typedef struct {
+ const char *name;
+ const char *valuePtr;
+ const char *valueEnd;
+ char containsRef;
+} ATTRIBUTE;
+
+struct encoding;
+typedef struct encoding ENCODING;
+
+struct encoding {
+ int (*scanners[XML_N_STATES])(const ENCODING *,
+ const char *,
+ const char *,
+ const char **);
+ int (*literalScanners[XML_N_LITERAL_TYPES])(const ENCODING *,
+ const char *,
+ const char *,
+ const char **);
+ int (*sameName)(const ENCODING *,
const char *, const char *);
- int (*nameMatchesAscii)(const struct encoding *,
+ int (*nameMatchesAscii)(const ENCODING *,
const char *, const char *);
- int (*getAtts)(const struct encoding *enc, const char *ptr,
- int attsMax, const char **atts);
- void (*updatePosition)(const struct encoding *,
+ int (*nameLength)(const ENCODING *, const char *);
+ int (*getAtts)(const ENCODING *enc, const char *ptr,
+ int attsMax, ATTRIBUTE *atts);
+ int (*charRefNumber)(const ENCODING *enc, const char *ptr);
+ void (*updatePosition)(const ENCODING *,
const char *ptr,
const char *end,
POSITION *);
+ int (*isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
+ const char **badPtr);
+ int (*isSystemId)(const ENCODING *enc, const char *ptr, const char *end,
+ const char **badPtr);
+ int (*encode)(const ENCODING *enc,
+ int charNum,
+ char *buf);
+ void (*convert[XML_N_INTERNAL_ENCODINGS])(const ENCODING *enc,
+ const char **fromP,
+ const char *fromLim,
+ char **toP,
+ const char *toLim);
int minBytesPerChar;
-} ENCODING;
+};
/*
Scan the string starting at ptr until the end of the next complete token,
#define XmlContentTok(enc, ptr, end, nextTokPtr) \
XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
+/* This is used for performing a 2nd-level tokenization on
+the content of a literal that has already been returned by XmlTok. */
+
+#define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
+ (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
+
+#define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \
+ XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr)
+
+#define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
+ XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
+
#define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2))
-#define XmlNameMatchesAscii(enc, ptr1, ptr2) (((enc)->nameMatchesAscii)(enc, ptr1, ptr2))
+
+#define XmlNameMatchesAscii(enc, ptr1, ptr2) \
+ (((enc)->nameMatchesAscii)(enc, ptr1, ptr2))
+
+#define XmlNameLength(enc, ptr) \
+ (((enc)->nameLength)(enc, ptr))
#define XmlGetAttributes(enc, ptr, attsMax, atts) \
(((enc)->getAtts)(enc, ptr, attsMax, atts))
+#define XmlCharRefNumber(enc, ptr) \
+ (((enc)->charRefNumber)(enc, ptr))
+
#define XmlUpdatePosition(enc, ptr, end, pos) \
(((enc)->updatePosition)(enc, ptr, end, pos))
+#define XmlIsPublicId(enc, ptr, end, badPtr) \
+ (((enc)->isPublicId)(enc, ptr, end, badPtr))
+
+#define XmlIsSystemId(enc, ptr, end, badPtr) \
+ (((enc)->isSystemId)(enc, ptr, end, badPtr))
+
+#define XmlEncode(enc, ch, buf) \
+ (((enc)->encode)(enc, ch, buf))
+
+#define XmlConvert(enc, targetEnc, fromP, fromLim, toP, toLim) \
+ (((enc)->convert[targetEnc])(enc, fromP, fromLim, toP, toLim))
+
typedef struct {
ENCODING initEnc;
const ENCODING **encPtr;
} INIT_ENCODING;
+int XMLTOKAPI XmlParseXmlDecl(int isGeneralTextEntity,
+ const ENCODING *enc,
+ const char *ptr,
+ const char *end,
+ const char **badPtr,
+ const char **versionPtr,
+ const char **encodingNamePtr,
+ const ENCODING **namedEncodingPtr,
+ int *standalonePtr);
+
void XMLTOKAPI XmlInitEncoding(INIT_ENCODING *, const ENCODING **);
+const ENCODING XMLTOKAPI *XmlGetInternalEncoding(int);
#ifdef __cplusplus
}
#define MULTIBYTE_CASES(ptr, end, ret) \
DO_LEAD_CASE(2, ptr, end, ret) \
DO_LEAD_CASE(3, ptr, end, ret) \
- DO_LEAD_CASE(4, ptr, end, ret) \
- DO_LEAD_CASE(5, ptr, end, ret) \
- DO_LEAD_CASE(6, ptr, end, ret)
+ DO_LEAD_CASE(4, ptr, end, ret)
#define INVALID_CASES(ptr, nextTokPtr) \
break; \
CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
- CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) \
- CHECK_NAME_CASE(5, enc, ptr, end, nextTokPtr) \
- CHECK_NAME_CASE(6, enc, ptr, end, nextTokPtr)
+ CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
#define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
case BT_LEAD ## n: \
break; \
CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
- CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) \
- CHECK_NMSTRT_CASE(5, enc, ptr, end, nextTokPtr) \
- CHECK_NMSTRT_CASE(6, enc, ptr, end, nextTokPtr)
+ CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
#ifndef PREFIX
#define PREFIX(ident) ident
{
if (ptr == end)
return XML_TOK_PARTIAL;
- if (CHAR_MATCHES(enc, ptr, '-'))
switch (BYTE_TYPE(enc, ptr)) {
case BT_MINUS:
return PREFIX(scanComment)(enc, ptr + MINBPC, end, nextTokPtr);
return PREFIX(scanLt)(enc, ptr + MINBPC, end, nextTokPtr);
case BT_AMP:
return PREFIX(scanRef)(enc, ptr + MINBPC, end, nextTokPtr);
+ case BT_CR:
+ ptr += MINBPC;
+ if (ptr == end)
+ return XML_TOK_TRAILING_CR;
+ if (BYTE_TYPE(enc, ptr) == BT_LF)
+ ptr += MINBPC;
+ *nextTokPtr = ptr;
+ return XML_TOK_DATA_NEWLINE;
+ case BT_LF:
+ *nextTokPtr = ptr + MINBPC;
+ return XML_TOK_DATA_NEWLINE;
case BT_RSQB:
ptr += MINBPC;
if (ptr == end)
case BT_NONXML:
case BT_MALFORM:
case BT_TRAIL:
+ case BT_CR:
+ case BT_LF:
*nextTokPtr = ptr;
return XML_TOK_DATA_CHARS;
default:
case BT_LEAD2:
case BT_LEAD3:
case BT_LEAD4:
- case BT_LEAD5:
- return PREFIX(contentTok)(enc, ptr - MINBPC, end, nextTokPtr);
+ *nextTokPtr = ptr - MINBPC;
+ return XML_TOK_INSTANCE_START;
}
*nextTokPtr = ptr;
return XML_TOK_INVALID;
}
- case BT_S: case BT_CR: case BT_LF:
+ case BT_CR:
+ if (ptr + MINBPC == end)
+ return XML_TOK_TRAILING_CR;
+ /* fall through */
+ case BT_S: case BT_LF:
for (;;) {
ptr += MINBPC;
if (ptr == end)
break;
switch (BYTE_TYPE(enc, ptr)) {
- case BT_S: case BT_CR: case BT_LF:
+ case BT_S: case BT_LF:
break;
+ case BT_CR:
+ /* don't split CR/LF pair */
+ if (ptr + MINBPC != end)
+ break;
+ /* fall through */
default:
*nextTokPtr = ptr;
return XML_TOK_PROLOG_S;
case BT_PLUS:
*nextTokPtr = ptr + MINBPC;
return XML_TOK_CLOSE_PAREN_PLUS;
+ case BT_CR: case BT_LF: case BT_S:
+ case BT_GT: case BT_COMMA: case BT_VERBAR:
+ case BT_RPAR:
+ *nextTokPtr = ptr;
+ return XML_TOK_CLOSE_PAREN;
}
*nextTokPtr = ptr;
- return XML_TOK_CLOSE_PAREN;
+ return XML_TOK_INVALID;
case BT_VERBAR:
*nextTokPtr = ptr + MINBPC;
return XML_TOK_OR;
} \
*nextTokPtr = ptr; \
return XML_TOK_INVALID;
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) LEAD_CASE(5) LEAD_CASE(6)
+ LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
case BT_NMSTRT:
case BT_HEX:
return XML_TOK_PARTIAL;
}
+static
+int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
+ const char **nextTokPtr)
+{
+ const char *start;
+ if (ptr == end)
+ return XML_TOK_NONE;
+ start = ptr;
+ while (ptr != end) {
+ switch (BYTE_TYPE(enc, ptr)) {
+#define LEAD_CASE(n) \
+ case BT_LEAD ## n: ptr += n; break;
+ LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
+#undef LEAD_CASE
+ case BT_AMP:
+ if (ptr == start)
+ return PREFIX(scanRef)(enc, ptr + MINBPC, end, nextTokPtr);
+ *nextTokPtr = ptr;
+ return XML_TOK_DATA_CHARS;
+ case BT_LT:
+ /* this is for inside entity references */
+ *nextTokPtr = ptr;
+ return XML_TOK_INVALID;
+ case BT_LF:
+ if (ptr == start) {
+ *nextTokPtr = ptr + MINBPC;
+ return XML_TOK_DATA_NEWLINE;
+ }
+ *nextTokPtr = ptr;
+ return XML_TOK_DATA_CHARS;
+ case BT_CR:
+ if (ptr == start) {
+ ptr += MINBPC;
+ if (ptr == end)
+ return XML_TOK_TRAILING_CR;
+ if (BYTE_TYPE(enc, ptr) == BT_LF)
+ ptr += MINBPC;
+ *nextTokPtr = ptr;
+ return XML_TOK_DATA_NEWLINE;
+ }
+ *nextTokPtr = ptr;
+ return XML_TOK_DATA_CHARS;
+ default:
+ ptr += MINBPC;
+ break;
+ }
+ }
+ *nextTokPtr = ptr;
+ return XML_TOK_DATA_CHARS;
+}
+
+static
+int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
+ const char **nextTokPtr)
+{
+ const char *start;
+ if (ptr == end)
+ return XML_TOK_NONE;
+ start = ptr;
+ while (ptr != end) {
+ switch (BYTE_TYPE(enc, ptr)) {
+#define LEAD_CASE(n) \
+ case BT_LEAD ## n: ptr += n; break;
+ LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
+#undef LEAD_CASE
+ case BT_AMP:
+ if (ptr == start)
+ return PREFIX(scanRef)(enc, ptr + MINBPC, end, nextTokPtr);
+ *nextTokPtr = ptr;
+ return XML_TOK_DATA_CHARS;
+ case BT_PERCNT:
+ if (ptr == start)
+ return PREFIX(scanPercent)(enc, ptr + MINBPC, end, nextTokPtr);
+ *nextTokPtr = ptr;
+ return XML_TOK_DATA_CHARS;
+ case BT_LF:
+ if (ptr == start) {
+ *nextTokPtr = ptr + MINBPC;
+ return XML_TOK_DATA_NEWLINE;
+ }
+ *nextTokPtr = ptr;
+ return XML_TOK_DATA_CHARS;
+ case BT_CR:
+ if (ptr == start) {
+ ptr += MINBPC;
+ if (ptr == end)
+ return XML_TOK_TRAILING_CR;
+ if (BYTE_TYPE(enc, ptr) == BT_LF)
+ ptr += MINBPC;
+ *nextTokPtr = ptr;
+ return XML_TOK_DATA_NEWLINE;
+ }
+ *nextTokPtr = ptr;
+ return XML_TOK_DATA_CHARS;
+ default:
+ ptr += MINBPC;
+ break;
+ }
+ }
+ *nextTokPtr = ptr;
+ return XML_TOK_DATA_CHARS;
+}
+
+static
+int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
+ const char **badPtr)
+{
+ ptr += MINBPC;
+ end -= MINBPC;
+ for (; ptr != end; ptr += MINBPC) {
+ switch (BYTE_TYPE(enc, ptr)) {
+ case BT_DIGIT:
+ case BT_HEX:
+ case BT_MINUS:
+ case BT_APOS:
+ case BT_LPAR:
+ case BT_RPAR:
+ case BT_PLUS:
+ case BT_COMMA:
+ case BT_SOL:
+ case BT_EQUALS:
+ case BT_QUEST:
+ case BT_CR:
+ case BT_LF:
+ break;
+ case BT_S:
+ if (CHAR_MATCHES(enc, ptr, '\t')) {
+ *badPtr = ptr;
+ return 0;
+ }
+ break;
+ case BT_NAME:
+ case BT_NMSTRT:
+ if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)
+ && !CHAR_MATCHES(enc, ptr, '_'))
+ break;
+ default:
+ *badPtr = ptr;
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static
+int PREFIX(isSystemId)(const ENCODING *enc, const char *ptr, const char *end,
+ const char **badPtr)
+{
+ ptr += MINBPC;
+ end -= MINBPC;
+ for (; ptr != end; ptr += MINBPC) {
+ switch (BYTE_TYPE(enc, ptr)) {
+ case BT_DIGIT:
+ case BT_HEX:
+ case BT_MINUS:
+ case BT_APOS:
+ case BT_LPAR:
+ case BT_RPAR:
+ case BT_PLUS:
+ case BT_COMMA:
+ case BT_SOL:
+ case BT_AMP:
+ case BT_SEMI:
+ case BT_EQUALS:
+ case BT_QUEST:
+ case BT_EXCL:
+ case BT_AST:
+ case BT_PERCNT:
+ break;
+ case BT_NAME:
+ case BT_NMSTRT:
+ if (BYTE_TO_ASCII(enc, ptr) & ~0x7f) {
+ *badPtr = ptr;
+ return 0;
+ }
+ break;
+ default:
+ switch (BYTE_TO_ASCII(enc, ptr)) {
+ case '@':
+ case '$':
+ break;
+ default:
+ *badPtr = ptr;
+ return 0;
+ }
+ break;
+ }
+ }
+ return 1;
+}
+
/* This must only be called for a well-formed start-tag or empty element tag.
-Returns the number of attributes. Pointers to the names of up to the first
-attsMax attributes are stored in atts. */
+Returns the number of attributes. Pointers to the first attsMax attributes
+are stored in atts. */
+
static
int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
- int attsMax, const char **atts)
+ int attsMax, ATTRIBUTE *atts)
{
enum { other, inName, inValue } state = inName;
int nAtts = 0;
switch (BYTE_TYPE(enc, ptr)) {
#define START_NAME \
if (state == other) { \
- if (nAtts < attsMax) \
- atts[nAtts] = ptr; \
- ++nAtts; \
+ if (nAtts < attsMax) { \
+ atts[nAtts].name = ptr; \
+ atts[nAtts].containsRef = 0; \
+ } \
state = inName; \
}
#define LEAD_CASE(n) \
case BT_LEAD ## n: START_NAME ptr += (n - MINBPC); break;
- LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) LEAD_CASE(5) LEAD_CASE(6)
+ LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
#undef LEAD_CASE
case BT_NONASCII:
case BT_NMSTRT:
break;
#undef START_NAME
case BT_QUOT:
- if (state == other) {
+ if (state != inValue) {
+ atts[nAtts].valuePtr = ptr + MINBPC;
state = inValue;
open = BT_QUOT;
}
- else if (open == BT_QUOT)
+ else if (open == BT_QUOT) {
state = other;
+ atts[nAtts++].valueEnd = ptr;
+ }
break;
case BT_APOS:
- if (state == other) {
+ if (state != inValue) {
+ atts[nAtts].valuePtr = ptr;
state = inValue;
open = BT_APOS;
}
- else if (open == BT_APOS)
+ else if (open == BT_APOS) {
state = other;
+ atts[nAtts++].valueEnd = ptr;
+ }
+ break;
+ case BT_AMP:
+ atts[nAtts].containsRef = 1;
break;
case BT_S: case BT_CR: case BT_LF:
/* This case ensures that the first attribute name is counted
/* not reached */
}
+static
+int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
+{
+ int result = 0;
+ /* skip &# */
+ ptr += 2*MINBPC;
+ if (CHAR_MATCHES(enc, ptr, 'x')) {
+ for (ptr += MINBPC; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC) {
+ int c = BYTE_TO_ASCII(enc, ptr);
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ result <<= 4;
+ result |= (c - '0');
+ break;
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ result <<= 4;
+ result += 10 + (c - 'A');
+ break;
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ result <<= 4;
+ result += 10 + (c - 'a');
+ break;
+ }
+ if (result >= 0x110000)
+ return -1;
+ }
+ }
+ else {
+ for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC) {
+ int c = BYTE_TO_ASCII(enc, ptr);
+ result *= 10;
+ result += (c - '0');
+ if (result >= 0x110000)
+ return -1;
+ }
+ }
+ /* FIXME maybe exclude surrogates as well */
+ if ((result < 0x80 && latin1tab[result] == BT_NONXML)
+ || result == 0xFFFE
+ || result == 0xFFFF)
+ return -1;
+ return result;
+}
+
static
int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
{
case BT_LEAD ## n: \
if (*ptr1++ != *ptr2++) \
return 0;
- LEAD_CASE(6) LEAD_CASE(5) LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
+ LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
#undef LEAD_CASE
/* fall through */
if (*ptr1++ != *ptr2++)
case BT_DIGIT:
case BT_NAME:
case BT_MINUS:
- if (*ptr2 != *ptr1)
+ if (*ptr2++ != *ptr1++)
+ return 0;
+#if MINBPC > 1
+ if (*ptr2++ != *ptr1++)
+ return 0;
+#if MINBPC > 2
+ if (*ptr2++ != *ptr1++)
return 0;
- ptr1 += MINBPC;
- ptr2 += MINBPC;
+#if MINBPC > 3
+ if (*ptr2++ != *ptr1++)
+ return 0;
+#endif
+#endif
+#endif
break;
default:
+#if MINBPC == 1
if (*ptr1 == *ptr2)
return 1;
+#endif
switch (BYTE_TYPE(enc, ptr2)) {
case BT_LEAD2:
case BT_LEAD3:
case BT_LEAD4:
- case BT_LEAD5:
- case BT_LEAD6:
case BT_NONASCII:
case BT_NMSTRT:
case BT_HEX:
case BT_LEAD2:
case BT_LEAD3:
case BT_LEAD4:
- case BT_LEAD5:
- case BT_LEAD6:
case BT_NONASCII:
case BT_NMSTRT:
case BT_HEX:
}
}
+static
+int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
+{
+ const char *start = ptr;
+ for (;;) {
+ switch (BYTE_TYPE(enc, ptr)) {
+#define LEAD_CASE(n) \
+ case BT_LEAD ## n: ptr += n; break;
+ LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
+#undef LEAD_CASE
+ case BT_NONASCII:
+ case BT_NMSTRT:
+ case BT_HEX:
+ case BT_DIGIT:
+ case BT_NAME:
+ case BT_MINUS:
+ ptr += MINBPC;
+ break;
+ default:
+ return ptr - start;
+ }
+ }
+}
+
static
void PREFIX(updatePosition)(const ENCODING *enc,
const char *ptr,
const char *end,
POSITION *pos)
{
- if (pos->ignoreInitialLF) {
- if (ptr == end)
- return;
- if (CHAR_MATCHES(enc, ptr, '\n'))
- ptr += MINBPC;
- pos->ignoreInitialLF = 0;
- }
while (ptr != end) {
switch (BYTE_TYPE(enc, ptr)) {
MULTIBYTE_CASES(ptr, end, ;/* hack! */)
case BT_CR:
pos->lineNumber++;
ptr += MINBPC;
- if (ptr == end) {
- pos->ignoreInitialLF = 1;
- pos->columnNumber = 0;
- return;
- }
- pos->columnNumber = (unsigned)-1;
- if (CHAR_MATCHES(enc, ptr, '\n'))
+ if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
ptr += MINBPC;
+ pos->columnNumber = (unsigned)-1;
break;
default:
ptr += MINBPC;
BT_MALFORM,
BT_LT,
BT_AMP,
+ BT_RSQB,
BT_LEAD2,
BT_LEAD3,
BT_LEAD4,
- BT_LEAD5,
- BT_LEAD6,
BT_TRAIL,
+ BT_CR,
+ BT_LF,
BT_GT,
BT_QUOT,
BT_APOS,
BT_SEMI,
BT_NUM,
BT_LSQB,
- BT_RSQB,
BT_S,
- BT_CR,
- BT_LF,
BT_NMSTRT,
BT_HEX,
BT_DIGIT,
fprintf(stderr, "%s: out of memory\n", name);
return 0;
}
- n = _read(fd, p, nbytes);
+ n = read(fd, p, nbytes);
if (n < 0) {
perror(name);
close(fd);
#include <stdlib.h>
#include <string.h>
-#include "wfcheck.h"
-#ifdef _MSC_VER
-#define XMLTOKAPI __declspec(dllimport)
-#endif
+#include "wfcheck.h"
+#include "hashtable.h"
#include "xmltok.h"
#include "xmlrole.h"
typedef struct {
const char *name;
-} NAMED;
-
-typedef struct {
- NAMED **v;
- size_t size;
- size_t used;
- size_t usedLim;
-} HASH_TABLE;
+ const char *textPtr;
+ size_t textLen;
+ const char *systemId;
+ const char *publicId;
+ const char *notation;
+ char open;
+ char wfInContent;
+ char wfInAttribute;
+ char magic;
+} ENTITY;
-#define BLOCK_SIZE 1024
+#define INIT_BLOCK_SIZE 1024
typedef struct block {
struct block *next;
typedef struct {
BLOCK *blocks;
const char *end;
- const char *ptr;
- const char *start;
+ char *ptr;
+ char *start;
} STRING_POOL;
typedef struct {
- STRING_POOL pool;
- HASH_TABLE paramEntities;
HASH_TABLE generalEntities;
+ STRING_POOL pool;
+ int containsRef;
+ int standalone;
+ char *groupConnector;
+ size_t groupSize;
} DTD;
+typedef struct {
+ DTD dtd;
+ size_t stackSize;
+ const char **startName;
+ int attsSize;
+ ATTRIBUTE *atts;
+} CONTEXT;
+
+static void poolInit(STRING_POOL *);
+static void poolDestroy(STRING_POOL *);
+static const char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
+ const char *ptr, const char *end);
+static const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
+ const char *ptr, const char *end);
+static int poolGrow(STRING_POOL *);
+static int dtdInit(DTD *);
+static void dtdDestroy(DTD *);
+static int contextInit(CONTEXT *);
+static void contextDestroy(CONTEXT *);
+
+#define poolStart(pool) ((pool)->start)
+#define poolDiscard(pool) ((pool)->ptr = (pool)->start)
+#define poolFinish(pool) ((pool)->start = (pool)->ptr)
+
+static enum WfCheckResult
+checkProlog(DTD *, const char *s, const char *end, const char **, const ENCODING **enc);
+static enum WfCheckResult
+checkContent(size_t level, CONTEXT *context, const ENCODING *enc,
+ const char *s, const char *end, const char **badPtr);
+static enum WfCheckResult
+checkGeneralTextEntity(CONTEXT *context,
+ const char *s, const char *end,
+ const char **nextPtr,
+ const ENCODING **enc);
+static enum WfCheckResult
+checkAttributeValue(DTD *, const ENCODING *, const char *, const char *, const char **);
static enum WfCheckResult
-checkProlog(int *tok, const char **s, const char *end, const char **nextTokP,
- const ENCODING **enc);
+checkAttributeUniqueness(CONTEXT *context, const ENCODING *enc, int nAtts,
+ const char **badPtr);
static
-void setPosition(const ENCODING *enc,
- const char *start,
- const char *end,
- const char **badPtr,
- unsigned long *badLine,
- unsigned long *badCol);
+enum WfCheckResult storeEntity(DTD *dtd,
+ const ENCODING *enc,
+ const char *entityNamePtr,
+ const char *entityNameEnd,
+ const char *entityTextPtr,
+ const char *entityTextEnd,
+ const char **badPtr);
+
enum WfCheckResult
-wfCheck(const char *s, size_t n,
+wfCheck(enum EntityType entityType, const char *s, size_t n,
const char **badPtr, unsigned long *badLine, unsigned long *badCol)
{
- enum WfCheckResult result;
- unsigned nElements = 0;
- unsigned nAtts = 0;
+ CONTEXT context;
+ const ENCODING *enc;
const char *start = s;
const char *end = s + n;
- const char *next;
- const ENCODING *enc;
- size_t stackSize = 1024;
- size_t level = 0;
- int tok;
- const char **startName = malloc(stackSize * sizeof(char *));
- int attsSize = 1024;
- const char **atts = malloc(attsSize * sizeof(char *));
-#define RETURN_CLEANUP(n) return (free((void *)startName), free((void *)atts), (n))
- if (!startName)
+ const char *next = 0;
+ enum WfCheckResult result;
+
+ if (!contextInit(&context)) {
+ contextDestroy(&context);
return noMemory;
- result = checkProlog(&tok, &s, end, &next, &enc);
- if (result) {
- setPosition(enc, start, s, badPtr, badLine, badCol);
- RETURN_CLEANUP(result);
}
+ if (entityType == documentEntity) {
+ result = checkProlog(&context.dtd, s, end, &next, &enc);
+ s = next;
+ if (!result) {
+ result = checkContent(0, &context, enc, s, end, &next);
+ s = next;
+ }
+ }
+ else {
+ result = checkGeneralTextEntity(&context, s, end, &next, &enc);
+ s = next;
+ }
+ if (result && s) {
+ POSITION pos;
+ memset(&pos, 0, sizeof(POSITION));
+ XmlUpdatePosition(enc, start, s, &pos);
+ *badPtr = s;
+ *badLine = pos.lineNumber;
+ *badCol = pos.columnNumber;
+ }
+ contextDestroy(&context);
+ return result;
+}
+
+static
+int contextInit(CONTEXT *p)
+{
+ p->stackSize = 1024;
+ p->startName = malloc(p->stackSize * sizeof(char *));
+ p->attsSize = 1024;
+ p->atts = malloc(p->attsSize * sizeof(ATTRIBUTE));
+ return dtdInit(&(p->dtd)) && p->atts && p->startName;
+}
+
+static
+void contextDestroy(CONTEXT *p)
+{
+ dtdDestroy(&(p->dtd));
+ free((void *)p->startName);
+ free((void *)p->atts);
+}
+
+static enum WfCheckResult
+checkContent(size_t level, CONTEXT *context, const ENCODING *enc,
+ const char *s, const char *end, const char **badPtr)
+{
+ size_t startLevel = level;
+ const char *next;
+ int tok = XmlContentTok(enc, s, end, &next);
for (;;) {
switch (tok) {
+ case XML_TOK_TRAILING_CR:
case XML_TOK_NONE:
- setPosition(enc, start, s, badPtr, badLine, badCol);
- RETURN_CLEANUP(noElements);
+ if (startLevel > 0) {
+ if (level != startLevel) {
+ *badPtr = s;
+ return asyncEntity;
+ }
+ return wellFormed;
+ }
+ *badPtr = s;
+ return noElements;
case XML_TOK_INVALID:
- setPosition(enc, start, next, badPtr, badLine, badCol);
- RETURN_CLEANUP(invalidToken);
+ *badPtr = next;
+ return invalidToken;
case XML_TOK_PARTIAL:
- setPosition(enc, start, s, badPtr, badLine, badCol);
- RETURN_CLEANUP(unclosedToken);
+ *badPtr = s;
+ return unclosedToken;
case XML_TOK_PARTIAL_CHAR:
- setPosition(enc, start, s, badPtr, badLine, badCol);
- RETURN_CLEANUP(partialChar);
+ *badPtr = s;
+ return partialChar;
case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
- nElements++;
break;
+ case XML_TOK_ENTITY_REF:
+ {
+ const char *name = poolStoreString(&context->dtd.pool, enc,
+ s + enc->minBytesPerChar,
+ next - enc->minBytesPerChar);
+ ENTITY *entity = (ENTITY *)lookup(&context->dtd.generalEntities, name, 0);
+ poolDiscard(&context->dtd.pool);
+ if (!entity) {
+ if (!context->dtd.containsRef || context->dtd.standalone) {
+ *badPtr = s;
+ return undefinedEntity;
+ }
+ break;
+ }
+ if (entity->wfInContent)
+ break;
+ if (entity->open) {
+ *badPtr = s;
+ return recursiveEntityRef;
+ }
+ if (entity->notation) {
+ *badPtr = s;
+ return binaryEntityRef;
+ }
+ if (entity) {
+ if (entity->textPtr) {
+ enum WfCheckResult result;
+ const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING);
+ entity->open = 1;
+ result = checkContent(level, context, internalEnc,
+ entity->textPtr, entity->textPtr + entity->textLen,
+ badPtr);
+ entity->open = 0;
+ if (result && *badPtr) {
+ *badPtr = s;
+ return result;
+ }
+ entity->wfInContent = 1;
+ }
+ }
+ break;
+ }
case XML_TOK_START_TAG_NO_ATTS:
- nElements++;
- if (level == stackSize) {
- startName = realloc((void *)startName, (stackSize *= 2) * sizeof(char *));
- if (!startName) {
- free((void *)atts);
+ if (level == context->stackSize) {
+ context->startName
+ = realloc((void *)context->startName, (context->stackSize *= 2) * sizeof(char *));
+ if (!context->startName)
return noMemory;
- }
}
- startName[level++] = s + enc->minBytesPerChar;
+ context->startName[level++] = s + enc->minBytesPerChar;
break;
case XML_TOK_START_TAG_WITH_ATTS:
- if (level == stackSize) {
- startName = realloc((void *)startName, (stackSize *= 2) * sizeof(char *));
- if (!startName) {
- free((void *)atts);
+ if (level == context->stackSize) {
+ context->startName = realloc((void *)context->startName, (context->stackSize *= 2) * sizeof(char *));
+ if (!context->startName)
return noMemory;
- }
}
- startName[level++] = s + enc->minBytesPerChar;
+ context->startName[level++] = s + enc->minBytesPerChar;
/* fall through */
case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
- nElements++;
{
int i;
- int n = XmlGetAttributes(enc, s, attsSize, atts);
- nAtts += n;
- if (n > attsSize) {
- attsSize = 2*n;
- atts = realloc((void *)atts, attsSize * sizeof(char *));
- if (!atts) {
- free((void *)startName);
+ int n = XmlGetAttributes(enc, s, context->attsSize, context->atts);
+ if (n > context->attsSize) {
+ context->attsSize = 2*n;
+ context->atts = realloc((void *)context->atts, context->attsSize * sizeof(ATTRIBUTE));
+ if (!context->atts)
return noMemory;
+ XmlGetAttributes(enc, s, n, context->atts);
+ }
+ for (i = 0; i < n; i++) {
+ if (context->atts[i].containsRef) {
+ enum WfCheckResult result
+ = checkAttributeValue(&context->dtd, enc,
+ context->atts[i].valuePtr,
+ context->atts[i].valueEnd,
+ badPtr);
+ if (result)
+ return result;
}
- XmlGetAttributes(enc, s, n, atts);
- }
- for (i = 1; i < n; i++) {
- int j;
- for (j = 0; j < i; j++) {
- if (XmlSameName(enc, atts[i], atts[j])) {
- setPosition(enc, start, atts[i], badPtr, badLine, badCol);
- RETURN_CLEANUP(duplicateAttribute);
- }
- }
+ }
+ if (i > 1) {
+ enum WfCheckResult result = checkAttributeUniqueness(context, enc, n, badPtr);
+ if (result)
+ return result;
}
}
break;
case XML_TOK_END_TAG:
+ if (level == startLevel) {
+ *badPtr = s;
+ return asyncEntity;
+ }
--level;
- if (!XmlSameName(enc, startName[level], s + enc->minBytesPerChar * 2)) {
- setPosition(enc, start, s, badPtr, badLine, badCol);
- RETURN_CLEANUP(tagMismatch);
+ if (!XmlSameName(enc, context->startName[level], s + enc->minBytesPerChar * 2)) {
+ *badPtr = s;
+ return tagMismatch;
+ }
+ break;
+ case XML_TOK_CHAR_REF:
+ if (XmlCharRefNumber(enc, s) < 0) {
+ *badPtr = s;
+ return badCharRef;
+ }
+ break;
+ case XML_TOK_PI:
+ if (XmlNameMatchesAscii(enc, s + 2 * enc->minBytesPerChar, "xml")) {
+ *badPtr = s;
+ return misplacedXmlPi;
}
break;
}
do {
tok = XmlPrologTok(enc, s, end, &next);
switch (tok) {
+ case XML_TOK_TRAILING_CR:
case XML_TOK_NONE:
- RETURN_CLEANUP(wellFormed);
+ return wellFormed;
case XML_TOK_PROLOG_S:
case XML_TOK_COMMENT:
case XML_TOK_PI:
break;
default:
if (tok > 0) {
- setPosition(enc, start, s, badPtr, badLine, badCol);
- RETURN_CLEANUP(junkAfterDocElement);
+ *badPtr = s;
+ return junkAfterDocElement;
}
break;
}
tok = XmlContentTok(enc, s, end, &next);
}
/* not reached */
- return 0;
}
static
-int checkProlog(int *tokp,
- const char **startp, const char *end,
- const char **nextTokP, const ENCODING **enc)
+int attcmp(const void *p1, const void *p2)
+{
+ const ATTRIBUTE *a1 = p1;
+ const ATTRIBUTE *a2 = p2;
+ size_t n1 = a1->valuePtr - a1->name;
+ size_t n2 = a2->valuePtr - a2->name;
+
+ if (n1 == n2) {
+ int n = memcmp(a1->name, a2->name, n1);
+ if (n)
+ return n;
+ /* Sort identical attribute names by position, so that we always
+ report the first duplicate attribute. */
+ if (a1->name < a2->name)
+ return -1;
+ else if (a1->name > a2->name)
+ return 1;
+ else
+ return 0;
+ }
+ else if (n1 < n2)
+ return -1;
+ else
+ return 1;
+}
+
+/* Note that this trashes the attribute values. */
+
+static enum WfCheckResult
+checkAttributeUniqueness(CONTEXT *context, const ENCODING *enc, int nAtts,
+ const char **badPtr)
{
+#define QSORT_MIN_ATTS 10
+ if (nAtts < QSORT_MIN_ATTS) {
+ int i;
+ for (i = 1; i < nAtts; i++) {
+ int j;
+ for (j = 0; j < i; j++) {
+ if (XmlSameName(enc, context->atts[i].name, context->atts[j].name)) {
+ *badPtr = context->atts[i].name;
+ return duplicateAttribute;
+ }
+ }
+ }
+ }
+ else {
+ int i;
+ const char *dup = 0;
+ /* Store the end of the name in valuePtr */
+ for (i = 0; i < nAtts; i++) {
+ ATTRIBUTE *a = context->atts + i;
+ a->valuePtr = a->name + XmlNameLength(enc, a->name);
+ }
+ qsort(context->atts, nAtts, sizeof(ATTRIBUTE), attcmp);
+ for (i = 1; i < nAtts; i++) {
+ ATTRIBUTE *a = context->atts + i;
+ if (XmlSameName(enc, a->name, a[-1].name)) {
+ if (!dup || a->name < dup)
+ dup = a->name;
+ }
+ }
+ if (dup) {
+ *badPtr = dup;
+ return duplicateAttribute;
+ }
+ }
+ return wellFormed;
+}
+
+static enum WfCheckResult
+checkProlog(DTD *dtd, const char *s, const char *end,
+ const char **nextPtr, const ENCODING **enc)
+{
+ const char *entityNamePtr, *entityNameEnd;
PROLOG_STATE state;
- const char *s = *startp;
+ ENTITY *entity;
INIT_ENCODING initEnc;
XmlInitEncoding(&initEnc, enc);
XmlPrologStateInit(&state);
for (;;) {
- int tok = XmlPrologTok(*enc, s, end, nextTokP);
+ const char *next;
+ int tok = XmlPrologTok(*enc, s, end, &next);
+ if (tok != XML_TOK_PROLOG_S) {
+ switch (XmlTokenRole(&state, tok, s, next, *enc)) {
+ case XML_ROLE_XML_DECL:
+ {
+ const char *encodingName = 0;
+ const ENCODING *encoding = 0;
+ const char *version;
+ int standalone = -1;
+ if (!XmlParseXmlDecl(0,
+ *enc,
+ s,
+ next,
+ nextPtr,
+ &version,
+ &encodingName,
+ &encoding,
+ &standalone))
+ return syntaxError;
+ if (encoding) {
+ if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) {
+ *nextPtr = encodingName;
+ return incorrectEncoding;
+ }
+ *enc = encoding;
+ }
+ else if (encodingName) {
+ *nextPtr = encodingName;
+ return unknownEncoding;
+ }
+ if (standalone == 1)
+ dtd->standalone = 1;
+ break;
+ }
+ case XML_ROLE_DOCTYPE_SYSTEM_ID:
+ if (!XmlIsSystemId(*enc, s, next, nextPtr))
+ return syntaxError;
+ dtd->containsRef = 1;
+ break;
+ case XML_ROLE_NOTATION_SYSTEM_ID:
+ if (!XmlIsSystemId(*enc, s, next, nextPtr))
+ return syntaxError;
+ break;
+ case XML_ROLE_DOCTYPE_PUBLIC_ID:
+ case XML_ROLE_ENTITY_PUBLIC_ID:
+ case XML_ROLE_NOTATION_PUBLIC_ID:
+ if (!XmlIsPublicId(*enc, s, next, nextPtr))
+ return syntaxError;
+ break;
+ case XML_ROLE_INSTANCE_START:
+ *nextPtr = s;
+ return wellFormed;
+ case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
+ case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
+ {
+ const char *tem = 0;
+ enum WfCheckResult result
+ = checkAttributeValue(dtd, *enc, s + (*enc)->minBytesPerChar,
+ next - (*enc)->minBytesPerChar,
+ &tem);
+ if (result) {
+ if (tem)
+ *nextPtr = tem;
+ return result;
+ }
+ break;
+ }
+ case XML_ROLE_ENTITY_VALUE:
+ {
+ enum WfCheckResult result
+ = storeEntity(dtd,
+ *enc,
+ entityNamePtr,
+ entityNameEnd,
+ s,
+ next,
+ nextPtr);
+ if (result != wellFormed)
+ return result;
+ }
+ break;
+ case XML_ROLE_ENTITY_SYSTEM_ID:
+ if (!XmlIsSystemId(*enc, s, next, nextPtr))
+ return syntaxError;
+ if (entityNamePtr) {
+ const char *name = poolStoreString(&dtd->pool, *enc, entityNamePtr, entityNameEnd);
+ entity = (ENTITY *)lookup(&dtd->generalEntities, name, sizeof(ENTITY));
+ if (entity->name != name) {
+ poolDiscard(&dtd->pool);
+ entity = 0;
+ }
+ else {
+ poolFinish(&dtd->pool);
+ entity->systemId = poolStoreString(&dtd->pool, *enc,
+ s + (*enc)->minBytesPerChar,
+ next - (*enc)->minBytesPerChar);
+ poolFinish(&dtd->pool);
+ }
+ }
+ break;
+ case XML_ROLE_ENTITY_NOTATION_NAME:
+ if (entity) {
+ entity->notation = poolStoreString(&dtd->pool, *enc, s, next);
+ poolFinish(&dtd->pool);
+ }
+ break;
+ case XML_ROLE_GENERAL_ENTITY_NAME:
+ entityNamePtr = s;
+ entityNameEnd = next;
+ break;
+ case XML_ROLE_PARAM_ENTITY_NAME:
+ entityNamePtr = 0;
+ entityNameEnd = 0;
+ break;
+ case XML_ROLE_ERROR:
+ *nextPtr = s;
+ switch (tok) {
+ case XML_TOK_COND_SECT_OPEN:
+ return condSect;
+ case XML_TOK_PARAM_ENTITY_REF:
+ return paramEntityRef;
+ case XML_TOK_INVALID:
+ *nextPtr = next;
+ return invalidToken;
+ case XML_TOK_NONE:
+ return noElements;
+ case XML_TOK_PARTIAL:
+ return unclosedToken;
+ case XML_TOK_PARTIAL_CHAR:
+ return partialChar;
+ case XML_TOK_TRAILING_CR:
+ *nextPtr = s + (*enc)->minBytesPerChar;
+ return noElements;
+ case XML_TOK_PI:
+ if (XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "xml"))
+ return misplacedXmlPi;
+ default:
+ return syntaxError;
+ }
+ case XML_ROLE_GROUP_OPEN:
+ if (state.level >= dtd->groupSize) {
+ if (dtd->groupSize)
+ dtd->groupConnector = realloc(dtd->groupConnector, dtd->groupSize *= 2);
+ else
+ dtd->groupConnector = malloc(dtd->groupSize = 32);
+ if (!dtd->groupConnector)
+ return noMemory;
+ }
+ dtd->groupConnector[state.level] = 0;
+ break;
+ case XML_ROLE_GROUP_SEQUENCE:
+ if (dtd->groupConnector[state.level] == '|') {
+ *nextPtr = s;
+ return syntaxError;
+ }
+ dtd->groupConnector[state.level] = ',';
+ break;
+ case XML_ROLE_GROUP_CHOICE:
+ if (dtd->groupConnector[state.level] == ',') {
+ *nextPtr = s;
+ return syntaxError;
+ }
+ dtd->groupConnector[state.level] = '|';
+ break;
+ case XML_ROLE_NONE:
+ if (tok == XML_TOK_PARAM_ENTITY_REF)
+ dtd->containsRef = 1;
+ break;
+ }
+ }
+ s = next;
+ }
+ /* not reached */
+}
+
+static enum WfCheckResult
+checkGeneralTextEntity(CONTEXT *context,
+ const char *s, const char *end,
+ const char **nextPtr,
+ const ENCODING **enc)
+{
+ INIT_ENCODING initEnc;
+ const char *next;
+ int tok;
+
+ XmlInitEncoding(&initEnc, enc);
+ tok = XmlContentTok(*enc, s, end, &next);
+
+ if (tok == XML_TOK_BOM) {
+ s = next;
+ tok = XmlContentTok(*enc, s, end, &next);
+ }
+ if (tok == XML_TOK_PI
+ && XmlNameMatchesAscii(*enc, s + 2 * (*enc)->minBytesPerChar, "xml")) {
+ const char *encodingName = 0;
+ const ENCODING *encoding = 0;
+ const char *version;
+ if (!XmlParseXmlDecl(1,
+ *enc,
+ s,
+ next,
+ nextPtr,
+ &version,
+ &encodingName,
+ &encoding,
+ 0))
+ return syntaxError;
+ if (encoding) {
+ if (encoding->minBytesPerChar != (*enc)->minBytesPerChar) {
+ *nextPtr = encodingName;
+ return incorrectEncoding;
+ }
+ *enc = encoding;
+ }
+ else if (encodingName) {
+ *nextPtr = encodingName;
+ return unknownEncoding;
+ }
+ s = next;
+ }
+ context->dtd.containsRef = 1;
+ return checkContent(1, context, *enc, s, end, nextPtr);
+}
+
+static enum WfCheckResult
+checkAttributeValue(DTD *dtd, const ENCODING *enc,
+ const char *ptr, const char *end, const char **badPtr)
+{
+ for (;;) {
+ const char *next;
+ int tok = XmlAttributeValueTok(enc, ptr, end, &next);
switch (tok) {
- case XML_TOK_START_TAG_WITH_ATTS:
- case XML_TOK_START_TAG_NO_ATTS:
- case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
- case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
- case XML_TOK_INVALID:
+ case XML_TOK_TRAILING_CR:
case XML_TOK_NONE:
- case XML_TOK_PARTIAL:
- *tokp = tok;
- *startp = s;
return wellFormed;
- case XML_TOK_BOM:
- case XML_TOK_PROLOG_S:
+ case XML_TOK_INVALID:
+ *badPtr = next;
+ return invalidToken;
+ case XML_TOK_PARTIAL:
+ *badPtr = ptr;
+ return invalidToken;
+ case XML_TOK_CHAR_REF:
+ if (XmlCharRefNumber(enc, ptr) < 0) {
+ *badPtr = ptr;
+ return badCharRef;
+ }
break;
- default:
- switch (XmlTokenRole(&state, tok, s, *nextTokP, *enc)) {
- case XML_ROLE_ERROR:
- *startp = s;
- return syntaxError;
+ case XML_TOK_DATA_CHARS:
+ case XML_TOK_DATA_NEWLINE:
+ break;
+ case XML_TOK_ENTITY_REF:
+ {
+ const char *name = poolStoreString(&dtd->pool, enc,
+ ptr + enc->minBytesPerChar,
+ next - enc->minBytesPerChar);
+ ENTITY *entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0);
+ poolDiscard(&dtd->pool);
+ if (!entity) {
+ if (!dtd->containsRef) {
+ *badPtr = ptr;
+ return undefinedEntity;
+ }
+ break;
+ }
+ if (entity->wfInAttribute)
+ break;
+ if (entity->open) {
+ *badPtr = ptr;
+ return recursiveEntityRef;
+ }
+ if (entity->notation) {
+ *badPtr = ptr;
+ return binaryEntityRef;
+ }
+ if (entity) {
+ if (entity->textPtr) {
+ enum WfCheckResult result;
+ const ENCODING *internalEnc = XmlGetInternalEncoding(XML_UTF8_ENCODING);
+ const char *textEnd = entity->textPtr + entity->textLen;
+ entity->open = 1;
+ result = checkAttributeValue(dtd, internalEnc, entity->textPtr, textEnd, badPtr);
+ entity->open = 0;
+ if (result && *badPtr) {
+ *badPtr = ptr;
+ return result;
+ }
+ entity->wfInAttribute = 1;
+ }
+ else {
+ *badPtr = ptr;
+ return attributeExternalEntityRef;
+ }
+ }
+ break;
}
break;
+ default:
+ abort();
}
- s = *nextTokP;
+ ptr = next;
}
/* not reached */
}
static
-void setPosition(const ENCODING *enc,
- const char *start, const char *end,
- const char **badPtr, unsigned long *badLine, unsigned long *badCol)
-{
- POSITION pos;
- memset(&pos, 0, sizeof(POSITION));
- XmlUpdatePosition(enc, start, end, &pos);
- *badPtr = end;
- *badLine = pos.lineNumber;
- *badCol = pos.columnNumber;
+void poolInit(STRING_POOL *pool)
+{
+ pool->blocks = 0;
+ pool->start = 0;
+ pool->ptr = 0;
+ pool->end = 0;
+}
+
+static
+void poolDestroy(STRING_POOL *pool)
+{
+ BLOCK *p = pool->blocks;
+ while (p) {
+ BLOCK *tem = p->next;
+ free(p);
+ p = tem;
+ }
+ pool->blocks = 0;
+ pool->ptr = 0;
+ pool->start = 0;
+ pool->end = 0;
+}
+
+static
+const char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
+ const char *ptr, const char *end)
+{
+ for (;;) {
+ XmlConvert(enc, XML_UTF8_ENCODING, &ptr, end, &(pool->ptr), pool->end);
+ if (ptr == end)
+ break;
+ if (!poolGrow(pool))
+ return 0;
+ }
+ return pool->start;
+}
+
+static
+const char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
+ const char *ptr, const char *end)
+{
+ if (!poolAppend(pool, enc, ptr, end))
+ return 0;
+ if (pool->ptr == pool->end && !poolGrow(pool))
+ return 0;
+ *(pool->ptr)++ = 0;
+ return pool->start;
+}
+
+static
+int poolGrow(STRING_POOL *pool)
+{
+ if (pool->blocks && pool->start == pool->blocks->s) {
+ size_t blockSize = (pool->end - pool->start)*2;
+ pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize);
+ if (!pool->blocks)
+ return 0;
+ pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
+ pool->start = pool->blocks->s;
+ pool->end = pool->start + blockSize;
+ }
+ else {
+ BLOCK *tem;
+ size_t blockSize = pool->end - pool->start;
+ if (blockSize < INIT_BLOCK_SIZE)
+ blockSize = INIT_BLOCK_SIZE;
+ else
+ blockSize *= 2;
+ tem = malloc(offsetof(BLOCK, s) + blockSize);
+ if (!tem)
+ return 0;
+ tem->next = pool->blocks;
+ pool->blocks = tem;
+ memcpy(tem->s, pool->start, pool->ptr - pool->start);
+ pool->ptr = tem->s + (pool->ptr - pool->start);
+ pool->start = tem->s;
+ pool->end = tem->s + blockSize;
+ }
+ return 1;
+}
+
+static int dtdInit(DTD *dtd)
+{
+ static const char *names[] = { "lt", "amp", "gt", "quot", "apos" };
+ static const char chars[] = { '<', '&', '>', '"', '\'' };
+ int i;
+
+ poolInit(&(dtd->pool));
+ hashTableInit(&(dtd->generalEntities));
+ for (i = 0; i < 5; i++) {
+ ENTITY *entity = (ENTITY *)lookup(&(dtd->generalEntities), names[i], sizeof(ENTITY));
+ if (!entity)
+ return 0;
+ entity->textPtr = chars + i;
+ entity->textLen = 1;
+ entity->magic = 1;
+ entity->wfInContent = 1;
+ entity->wfInAttribute = 1;
+ }
+ dtd->containsRef = 0;
+ dtd->groupSize = 0;
+ dtd->groupConnector = 0;
+ return 1;
+}
+
+static void dtdDestroy(DTD *dtd)
+{
+ poolDestroy(&(dtd->pool));
+ hashTableDestroy(&(dtd->generalEntities));
+ free(dtd->groupConnector);
+}
+
+static
+enum WfCheckResult storeEntity(DTD *dtd,
+ const ENCODING *enc,
+ const char *entityNamePtr,
+ const char *entityNameEnd,
+ const char *entityTextPtr,
+ const char *entityTextEnd,
+ const char **badPtr)
+{
+ ENTITY *entity;
+ const ENCODING *utf8 = XmlGetInternalEncoding(XML_UTF8_ENCODING);
+ STRING_POOL *pool = &(dtd->pool);
+ if (entityNamePtr) {
+ if (!poolStoreString(pool, enc, entityNamePtr, entityNameEnd))
+ return noMemory;
+ entity = (ENTITY *)lookup(&(dtd->generalEntities), pool->start, sizeof(ENTITY));
+ if (entity->name != pool->start) {
+ poolDiscard(pool);
+ entityNamePtr = 0;
+ }
+ else
+ poolFinish(pool);
+ }
+ entityTextPtr += enc->minBytesPerChar;
+ entityTextEnd -= enc->minBytesPerChar;
+ for (;;) {
+ const char *next;
+ int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
+ switch (tok) {
+ case XML_TOK_PARAM_ENTITY_REF:
+ *badPtr = entityTextPtr;
+ return syntaxError;
+ case XML_TOK_NONE:
+ if (entityNamePtr) {
+ entity->textPtr = pool->start;
+ entity->textLen = pool->ptr - pool->start;
+ poolFinish(pool);
+ }
+ else
+ poolDiscard(pool);
+ return wellFormed;
+ case XML_TOK_ENTITY_REF:
+ case XML_TOK_DATA_CHARS:
+ if (!poolAppend(pool, enc, entityTextPtr, next))
+ return noMemory;
+ break;
+ case XML_TOK_TRAILING_CR:
+ next = entityTextPtr + enc->minBytesPerChar;
+ /* fall through */
+ case XML_TOK_DATA_NEWLINE:
+ if (pool->end == pool->ptr && !poolGrow(pool))
+ return noMemory;
+ *(pool->ptr)++ = '\n';
+ break;
+ case XML_TOK_CHAR_REF:
+ {
+ char buf[XML_MAX_BYTES_PER_CHAR];
+ int i;
+ int n = XmlCharRefNumber(enc, entityTextPtr);
+ if (n < 0) {
+ *badPtr = entityTextPtr;
+ return badCharRef;
+ }
+ n = XmlEncode(utf8, n, buf);
+ if (!n) {
+ *badPtr = entityTextPtr;
+ return badCharRef;
+ }
+ for (i = 0; i < n; i++) {
+ if (pool->end == pool->ptr && !poolGrow(pool))
+ return noMemory;
+ *(pool->ptr)++ = buf[i];
+ }
+ }
+ break;
+ case XML_TOK_PARTIAL:
+ *badPtr = entityTextPtr;
+ return invalidToken;
+ case XML_TOK_INVALID:
+ *badPtr = next;
+ return invalidToken;
+ default:
+ abort();
+ }
+ entityTextPtr = next;
+ }
+ /* not reached */
}
partialChar,
tagMismatch,
duplicateAttribute,
- junkAfterDocElement
+ junkAfterDocElement,
+ paramEntityRef,
+ condSect,
+ undefinedEntity,
+ recursiveEntityRef,
+ asyncEntity,
+ badCharRef,
+ binaryEntityRef,
+ attributeExternalEntityRef,
+ misplacedXmlPi,
+ unknownEncoding,
+ incorrectEncoding
};
-enum WfCheckResult wfCheck(const char *s, size_t n,
+enum EntityType {
+ documentEntity,
+ generalTextEntity
+};
+
+enum WfCheckResult wfCheck(enum EntityType entityType,
+ const char *s, size_t n,
const char **errorPtr,
unsigned long *errorLineNumber,
unsigned long *errorColNumber);
+const char *wfCheckMessage(enum WfCheckResult);
#include <stdio.h>
#include "filemap.h"
+static void win32perror(const char *);
+
int filemap(const char *name,
void (*processor)(const void *, size_t, const char *, void *arg),
void *arg)
f = CreateFile(name, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING,
FILE_FLAG_SEQUENTIAL_SCAN, NULL);
if (f == INVALID_HANDLE_VALUE) {
- fprintf(stderr, "%s: CreateFile failed\n", name);
+ win32perror(name);
return 0;
}
size = GetFileSize(f, &sizeHi);
+ if (size == (DWORD)-1) {
+ win32perror(name);
+ return 0;
+ }
if (sizeHi) {
- fprintf(stderr, "%s: too big (limit 2Gb)\n", name);
+ fprintf(stderr, "%s: bigger than 2Gb\n", name);
return 0;
}
/* CreateFileMapping barfs on zero length files */
if (size == 0) {
- fprintf(stderr, "%s: zero-length file\n", name);
- return 0;
+ static const char c = '\0';
+ processor(&c, 0, name, arg);
+ CloseHandle(f);
+ return 1;
}
m = CreateFileMapping(f, NULL, PAGE_READONLY, 0, 0, NULL);
if (m == NULL) {
- fprintf(stderr, "%s: CreateFileMapping failed\n", name);
+ win32perror(name);
CloseHandle(f);
return 0;
}
p = MapViewOfFile(m, FILE_MAP_READ, 0, 0, 0);
if (p == NULL) {
+ win32perror(name);
CloseHandle(m);
CloseHandle(f);
- fprintf(stderr, "%s: MapViewOfFile failed\n", name);
return 0;
}
processor(p, size, name, arg);
CloseHandle(f);
return 1;
}
+
+static
+void win32perror(const char *s)
+{
+ LPVOID buf;
+ if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
+ NULL,
+ GetLastError(),
+ MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+ (LPTSTR) &buf,
+ 0,
+ NULL)) {
+ fprintf(stderr, "%s: %s", s, buf);
+ fflush(stderr);
+ LocalFree(buf);
+ }
+ else
+ fprintf(stderr, "%s: unknown Windows error\n", s);
+}
#include <stdio.h>
+#include <string.h>
#include "wfcheck.h"
#include "filemap.h"
+struct ProcessFileArg {
+ enum EntityType entityType;
+ int result;
+};
+
static
-void processFile(const void *data, size_t size, const char *filename, void *arg)
+void processFile(const void *data, size_t size, const char *filename, void *p)
{
const char *badPtr = 0;
unsigned long badLine = 0;
unsigned long badCol = 0;
- int *ret = arg;
+ struct ProcessFileArg *arg = p;
enum WfCheckResult result;
- result = wfCheck(data, size, &badPtr, &badLine, &badCol);
+ result = wfCheck(arg->entityType, data, size, &badPtr, &badLine, &badCol);
if (result) {
- static const char *message[] = {
- 0,
- "out of memory",
- "syntax error",
- "no element found",
- "invalid token",
- "unclosed token",
- "unclosed token",
- "mismatched tag",
- "duplicate attribute",
- "junk after document element",
- };
- fprintf(stderr, "%s:", filename);
+ const char *msg = wfCheckMessage(result);
+ fprintf(stdout, "%s:", filename);
if (badPtr != 0)
- fprintf(stderr, "%lu:%lu:", badLine+1, badCol);
- fprintf(stderr, "E: %s", message[result]);
- putc('\n', stderr);
- if (!*ret)
- *ret = 1;
+ fprintf(stdout, "%lu:%lu:", badLine+1, badCol);
+ fprintf(stdout, "E: %s", msg ? msg : "(unknown message)");
+ putc('\n', stdout);
+ arg->result = 1;
}
+ else
+ arg->result = 0;
}
-
int main(int argc, char **argv)
{
- int i;
+ int i = 1;
int ret = 0;
- if (argc == 1) {
- fprintf(stderr, "usage: %s filename ...\n", argv[0]);
+ struct ProcessFileArg arg;
+
+ arg.entityType = documentEntity;
+
+ if (i < argc && strcmp(argv[i], "-g") == 0) {
+ i++;
+ arg.entityType = generalTextEntity;
+ }
+ if (i < argc && strcmp(argv[i], "--") == 0)
+ i++;
+ if (i == argc) {
+ fprintf(stderr, "usage: %s [-g] filename ...\n", argv[0]);
return 1;
}
- for (i = 1; i < argc; i++) {
- if (!filemap(argv[i], processFile, &ret))
+ for (; i < argc; i++) {
+ if (!filemap(argv[i], processFile, &arg))
ret = 2;
+ else if (arg.result && !ret)
+ ret = 1;
}
return ret;
}