[reportlab-users] new pyRXP

Stuart Bishop reportlab-users@reportlab.com
Fri, 11 Apr 2003 07:28:26 +1000


--Apple-Mail-4-835946544
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
	charset=US-ASCII;
	format=flowed


On Thursday, April 10, 2003, at 09:56  PM, Robin Becker wrote:

>>>> import pyRXPU
>>>> pyRXPU.Parser()(open('001.xml','r').read())
> Traceback (most recent call last):
>   File "<interactive input>", line 1, in ?
> Error: Error: EOE in comment in entity "e" defined at line 2 char 1 of
>  file:///C:/Python/reportlab/rl_addons/pyRXP/test/sb/001.ent
>  in unnamed entity at line 3 char 4 of 
> file:///C:/Python/reportlab/rl_ad
> dons/pyRXP/test/sb/001.ent
> EOE in commentParse Failed!
>>>>

That is the correct output (it is not well formed XML). However, when I
run it under OS X it reports malloc errors.

> I certainly don't see any crashes though. I'm using Python 2.2 under
> win32.

OSX just spews out lots of warnings, and segfaults if the test is run 
with
the -v flag:
.*** malloc[12626]: Deallocation of a pointer not malloced: 0x7478e2; 
This could be a double free(), or free() called with the middle of an 
allocated block; Try setting environment variable MallocHelp to see 
tools to help debug

Redhat 7.2 seg faults.

The Win32 malloc routines might be more forgiving.

Here is the stack trace from OS X. I may be able to trace this through
with a debugger later today:
#0  0x900042e0 in free_list_remove_ptr ()
#1  0x90003f60 in szone_free ()
#2  0x00620750 in Fclose (file=0x720150) at build/_pyRXPU/stdio16U.c:427
#3  0x0061ec60 in SourceClose (source=0x7234e0) at 
build/_pyRXPU/inputU.c:130
#4  0x00618240 in FreeParser (p=0x4568f0) at 
build/_pyRXPU/xmlparserU.c:657
#5  0x00624a14 in pyRXPParser_parse (xself=0x39b010, args=0x62e410, 
kw=0x7234e0) at build/_pyRXPU/pyRXPU.c:769

Here is a version of pyRXP.c that causes everything to use Python's
memory routines instead of malloc/free. To use it you need to remove
the reference to 'system.c' in setup.py:


--Apple-Mail-4-835946544
Content-Disposition: attachment;
	filename=test_xmltestsuite.py
Content-Transfer-Encoding: 7bit
Content-Type: application/octet-stream;
	x-unix-mode=0644;
	name="test_xmltestsuite.py"

#!/usr/bin/env python
'''
$Id: test_xmltestsuite.py,v 1.1 2003/02/08 16:35:39 zen Exp $
Test parsing and validation against James Clark's test cases,
as downloaded from http://www.jclark.com/xml/
The .zip file should be in the same directory as this script.
Note that the .zip file can be freely distributed in unmodified form
so it could be added to the pyRXP distribution.
'''

__rcs_id__  = '$Id: test_xmltestsuite.py,v 1.1 2003/02/08 16:35:39 zen Exp $'
__version__ = '$Revision: 1.1 $'[11:-2]
__author__ = 'Stuart Bishop <stuart@stuartbishop.net>'

debug = 0

import unittest
import zipfile
import sys
import os
import os.path
import codecs
from glob import glob

sys.path.extend(glob('../build/*'))
print >> sys.stderr,'Path is %r' % sys.path

import pyRXPU

# Debug is to help me trace down memory bugs
if debug: import time

# 2.2 compatibility - sort of
try:
    __file__
except NameError:
    __file__ = os.path.join(os.getcwd(),'oops')

class test_pyRXPU(unittest.TestCase):
    mod = pyRXPU
    
    def parse(self,filename,**kw):
        if debug: print >> sys.stderr,'About to parse %s' % filename
        kw = kw.copy()
        kw['ReturnComments'] = 1
        kw['ExpandEmpty'] = 1
        kw['ReturnProcessingInstructions'] = 1
        parser = self.mod.Parser(**kw)
        # Change directory in case we are loading entities from cwd
        retdir = os.getcwd()
        d,n = os.path.split(filename)
        os.chdir(d)
        try:
            f = open(n)
            xml = f.read()
            return parser.parse(xml)
        finally:
            try:
                f.close()
            except:
                pass
            os.chdir(retdir)
            if debug: print >> sys.stderr,'Done parsing   %s' % filename
            if debug: print >> sys.stderr,'='*60
            if debug: time.sleep(1)

    def getcanonical(self,filename):
        ''' Parse in the named file, and return it as canonical XML '''
        return self._getcan(self.parse(filename))

    def _getcan(self,node):
        if type(node) in (type(''),type(u'')):
            #if node.startswith(u'<?') or node.startswith(u'<!'):
            #    return node
            #else:
            return self._quote(node)

        tag,attrs,kids,junk = node

        if tag == self.mod.commentTagName:
            return u'<!--%s-->' % (kids[0])
        elif tag == self.mod.piTagName:
            return u'<?%s %s?>' % (attrs['name'],kids[0])

        if attrs is None:
            attrs = ''
        else:
            keys = attrs.keys()
            keys.sort() # Attributes in lexical order
            attrs = ' '.join(
                ['%s="%s"' % (k,self._quote(attrs[k])) for k in keys]
                )
            if attrs:
                attrs = ' ' + attrs

        text = ''.join([self._getcan(kid) for kid in kids])

        return '<%s%s>%s</%s>' % (tag,attrs,text,tag)

    def _quote(self,txt):
        txt = txt.replace('&','&amp;')
        txt = txt.replace('<','&lt;')
        txt = txt.replace('>','&gt;')
        txt = txt.replace('"','&quot;')
        txt = txt.replace('\x09','&#9;')
        txt = txt.replace('\x0a','&#10;')
        txt = txt.replace('\x0d','&#13;')
        return txt

    def _test_valid(self,inname,outname):
        inxml = self.getcanonical(inname)
        f = codecs.open(outname,mode='r',encoding='utf8')
        outxml = f.read()
        f.close()
        self.assertEqual(inxml,outxml)

    def _test_invalid_parse(self,inname):
        try:
            self.parse(inname,Validate=0)
        except self.mod.error,x:
            self.fail('Failed to parse %r in non-validating mode' % inname)

    def _test_invalid_validate(self,inname):
        try:
            self.parse(inname,Validate=1)
            self.fail('Failed to detect validity error in %r' % inname)
        except self.mod.error:
            pass

    def _test_notwf(self,inname):
        try:
            self.parse(inname,Validate=0)
            self.fail(
                'Failed to detect that %r was not well formed' % inname
                )
        except self.mod.error:
            pass

def buildup_test(cls=test_pyRXPU):
    ''' Add test methods to the TestCase '''
    cls.valid = []
    cls.invalid = []
    cls.notwf = []
    testdir = os.path.dirname(__file__)
    zipf = zipfile.ZipFile(os.path.join(testdir,'xmltest.zip'))
    for zipname in zipf.namelist():

        # Extract the files if they don't alrady exist
        osname = os.path.join(*zipname.split('/')) # For non-unixes
        osname = os.path.join(testdir,osname)
        dir = os.path.dirname(osname)
        if not os.path.isdir(dir):
            os.makedirs(dir)
        if not os.path.isfile(osname):
            f = open(osname,'wb')
            f.write(zipf.read(zipname))
            f.close()

        # Add input files to our lists
        if os.path.splitext(osname)[1] == '.xml' and zipname.find('out') == -1:
            if zipname.find('invalid') != -1:
                cls.invalid.append(osname)
            elif zipname.find('not-wf') != -1:
                cls.notwf.append(osname)
            elif zipname.find('valid') != -1:
                outname = os.path.join(dir,'out',os.path.basename(osname))
                cls.valid.append( (osname,outname) )

    # Add 'valid' tests
    for inname,outname in cls.valid:
        num = int(os.path.splitext(os.path.basename(inname))[0])
        dir = os.path.split(os.path.split(inname)[0])[1]
        mname = 'test_Valid_%s_%03d' % (dir,num)
        def doTest(self,inname=inname,outname=outname):
            self._test_valid(inname,outname)
        setattr(cls,mname,doTest)

    # Add 'invalid' tests
    for inname in cls.invalid:
        num = int(os.path.splitext(os.path.basename(inname))[0])
        mname = 'test_InvalidParse_%03d' % (num)
        def doTest(self,inname=inname):
            self._test_invalid_parse(inname)
        setattr(cls,mname,doTest)
        mname = 'test_InvalidValidate_%03d' % (num)
        def doTest(self,inname=inname):
            self._test_invalid_validate(inname)
        setattr(cls,mname,doTest)

    # Add 'not wellformed' tests
    for inname in cls.notwf:
        num = int(os.path.splitext(os.path.basename(inname))[0])
        dir = os.path.split(os.path.split(inname)[0])[1]
        mname = 'test_NotWellFormed_%s_%03d' % (dir,num)
        def doTest(self,inname=inname):
            self._test_notwf(inname)
        setattr(cls,mname,doTest)
        
buildup_test()

if __name__ == '__main__':
    if debug: raw_input('Enter to start')
    unittest.main()
    if debug: raw_input('Enter to end')

--Apple-Mail-4-835946544
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
	charset=US-ASCII;
	format=flowed



The Python docs say this is a good idea as it helps Python manage memory
better. I have no idea if this causes any speed or memory hits.

-- 
Stuart Bishop <zen@shangri-la.dropbear.id.au>
http://shangri-la.dropbear.id.au/


--Apple-Mail-4-835946544--