The
parsexml module is being used to extract the links out of a HTML document. I use the
www.oxygenbasic.org page for this test.
# Example program to show the new parsexml module
# This program reads an HTML file and writes all its used links to stdout.
# Errors and whitespace are ignored.
import os, streams, parsexml, strutils
proc `=?=` (a, b: string): bool =
# little trick: define our own comparator that ignores case
return cmpIgnoreCase(a, b) == 0
if paramCount() < 1:
quit("Usage: htmlrefs filename[.html]")
var links = 0 # count the number of links
var filename = addFileExt(ParamStr(1), "html")
var s = newFileStream(filename, fmRead)
if s == nil: quit("cannot open the file " & filename)
var x: TXmlParser
open(x, s, filename)
next(x) # get first event
block mainLoop:
while true:
case x.kind
of xmlElementOpen:
# the <a href = "xyz"> tag we are interested in always has an attribute,
# thus we search for ``xmlElementOpen`` and not for ``xmlElementStart``
if x.elementName =?= "a":
x.next()
if x.kind == xmlAttribute:
if x.attrKey =?= "href":
var link = x.attrValue
inc(links)
# skip until we have an ``xmlElementClose`` event
while true:
x.next()
case x.kind
of xmlEof: break mainLoop
of xmlElementClose: break
else: nil
x.next() # skip ``xmlElementClose``
# now we have the description for the ``a`` element
var desc = ""
while x.kind == xmlCharData:
desc.add(x.charData)
x.next()
Echo(desc & ": " & link)
else:
x.next()
of xmlEof: break # end of file reached
of xmlError:
Echo(errorMsg(x))
x.next()
else: x.next() # skip other events
echo($links & " link(s) found!")
x.close()
jrs@laptop:~/nimrod/examples$ nimrod c -d:release htmlrefs.nim
config/nimrod.cfg(36, 11) Hint: added path: '/home/jrs/.babel/libs/' [Path]
Hint: used config file '/home/jrs/nimrod/config/nimrod.cfg' [Conf]
Hint: system [Processing]
Hint: htmlrefs [Processing]
Hint: os [Processing]
Hint: strutils [Processing]
Hint: parseutils [Processing]
Hint: times [Processing]
Hint: posix [Processing]
Hint: streams [Processing]
Hint: parsexml [Processing]
Hint: hashes [Processing]
Hint: lexbase [Processing]
Hint: unicode [Processing]
gcc -c -w -O3 -fno-strict-aliasing -I/home/jrs/nimrod/lib -o examples/nimcache/htmlrefs.o examples/nimcache/htmlrefs.c
gcc -c -w -O3 -fno-strict-aliasing -I/home/jrs/nimrod/lib -o examples/nimcache/system.o examples/nimcache/system.c
gcc -c -w -O3 -fno-strict-aliasing -I/home/jrs/nimrod/lib -o examples/nimcache/os.o examples/nimcache/os.c
gcc -c -w -O3 -fno-strict-aliasing -I/home/jrs/nimrod/lib -o examples/nimcache/strutils.o examples/nimcache/strutils.c
gcc -c -w -O3 -fno-strict-aliasing -I/home/jrs/nimrod/lib -o examples/nimcache/streams.o examples/nimcache/streams.c
gcc -c -w -O3 -fno-strict-aliasing -I/home/jrs/nimrod/lib -o examples/nimcache/parsexml.o examples/nimcache/parsexml.c
gcc -c -w -O3 -fno-strict-aliasing -I/home/jrs/nimrod/lib -o examples/nimcache/hashes.o examples/nimcache/hashes.c
gcc -c -w -O3 -fno-strict-aliasing -I/home/jrs/nimrod/lib -o examples/nimcache/lexbase.o examples/nimcache/lexbase.c
gcc -c -w -O3 -fno-strict-aliasing -I/home/jrs/nimrod/lib -o examples/nimcache/unicode.o examples/nimcache/unicode.c
gcc -o /home/jrs/nimrod/examples/htmlrefs examples/nimcache/unicode.o examples/nimcache/lexbase.o examples/nimcache/hashes.o examples/nimcache/parsexml.o examples/nimcache/streams.o examples/nimcache/posix.o examples/nimcache/times.o examples/nimcache/parseutils.o examples/nimcache/strutils.o examples/nimcache/os.o examples/nimcache/system.o examples/nimcache/htmlrefs.o -ldl
Hint: operation successful (16742 lines compiled; 2.079 sec total; 19.199MB) [SuccessX]
jrs@laptop:~/nimrod/examples$ ./htmlrefs o2.html
o2.html(17, 13) Error: '"' or "'" expected
o2.html(21, 11) Error: '"' or "'" expected
o2.html(24, 24) Error: '"' or "'" expected
o2.html(26, 35) Error: '"' or "'" expected
o2.html(30, 42) Error: '"' or "'" expected
Alpha Downloads:
http://www.oxygenbasic.org/downloads.htmGames:
http://www.oxygenbasic.org/games.htmReference:
http://www.oxygenbasic.org/reference.htmForum:
http://www.oxygenbasic.org/forum/Wiki:
http://www.oxygenbasic.org/wiki/index.php/Main_Page5 link(s) found!
jrs@laptop:~/nimrod/examples$