Lazy Import

As Richard Tew mentioned on his blog, we are using the lazy importing trick to reduce memory overhead.

We improved the original module by adding some features:

  • A simpler and more robust injection mechanism using gc.get_referrers()
  • Supporting zip imports
  • Supporting reload()
  • A reporting capability.
  • Supporting bypassing selected modules or packages.

The last bit is important because some modules may be used internally by C and those cannot be treated with this.  In our case, we actually import this module from C right after importing site.py, in order to get maximum benefit, so we may be seeing more problems than the casual user who imports it from a .py file.

I don’t have any other good place to put this, so I’m just leaving it here for the time being.

[python]
# Copyright rPath, Inc., 2006
# Available under the python license
“”” Defines an on-demand importer that only actually loads modules when their
attributes are accessed. NOTE: if the ondemand module is viewed using
introspection, like dir(), isinstance, etc, it will appear as a
ModuleProxy, not a module, and will not have the correct attributes.
Barring introspection, however, the module will behave as normal.
“””

# modified for CCP by Kristján Valur Jónsson:
# – Use the gc.getreferrers() method to replace module references
# – Add zip support
# – support reload()
# – Add reporting and memory analysis
# – Add bypass mechanism for modules where this causes problems

import sys
import imp
import gc
import __builtin__
import zipimport

memory_query_func = None #set this to something returning memory use
verbose = False

ModuleType = type(sys)

#modules that bypass this mechanism
ignorenames = set() #module names
ignorepkg = set() #package names
ignorepath = set() #paths to ignore

#side effect register string unicode handling / conversion
ignorenames |= set([“encodings”])
#side effect prevent internal Python borrowed reference choking
ignorenames |= set([“warnings”])

#statistics
proxies = set()
proxyTally = 0
reals = set()
ignored = set()
existing = set(k for k,v in sys.modules.iteritems() if v)

def report(arg=””):
if not verbose:
return
loaded = arg.startswith(“load “)
if loaded:
if memory_query_func is not None:
print >> sys.stderr, “lazyimport: %s (now using %0.3f Mb)” % (arg, memory_query_func())
else:
print >> sys.stderr, “lazyimport: %s” % arg
else:
if memory_query_func is not None:
print >> sys.stderr, “lazyimport report: %s (now using %0.3f Mb)” % (arg, memory_query_func())
else:
print >> sys.stderr, “lazyimport report: %s” % arg

if verbose > 1 or not loaded:
print >> sys.stderr, “proxy imported %d %r”%(len(proxies), sorted(proxies))
print >> sys.stderr, “proxy imported (maximum size reached) %d” % proxyTally
print >> sys.stderr, “fully imported (pre lazyimport) %d %r”%(len(existing), sorted(existing))
print >> sys.stderr, “fully imported (via lazyimport) %d %r”%(len(reals), sorted(reals))
print >> sys.stderr, “fully imported (via allowed bypass) %d %r”%(len(ignored), sorted(ignored))

modules = set(k for k,v in sys.modules.iteritems() if v)
diff = modules-reals-proxies-ignored-existing
print >> sys.stderr, “fully imported (lost track of these) %d %r”%(len(diff), sorted(diff))

builtins = set(sys.builtin_module_names)
diff = builtins & proxies
print >> sys.stderr, “builtins (proxied) %d %r” % (len(diff), diff)
diff = builtins & (reals | existing)
print >> sys.stderr, “builtins (fully imported) %d %r” % (len(diff), diff)
diff = builtins – proxies – reals – existing
print >> sys.stderr, “builtins (not imported) %d %r” % (len(diff), diff)

def loadModule(proxy, name, loader):
#see if the module is already loaded
mod = sys.modules.get(name, None)
#avoid isinstace on mod, because it will cause __class__ lookup and this
#causes recursion
if mod is not proxy and isinstance(mod, ModuleType):
return mod

#load the module
mod = loader.load_module(name)
replaceModule(proxy, mod)

reals.add(name)
try:
proxies.remove(name)
except KeyError:
pass
report(“load “+name)
return mod

def replaceModule(proxy, mod):
“”” Find all dicts where proxy is, and replace it with the actual module.
Typcially, this is the sys.modules and any module dicts.
“””
for e in gc.get_referrers(proxy):
if isinstance(e, dict):
for k, v in e.iteritems():
if v is proxy:
e[k] = mod

class ModuleProxy(object):
def __init__(self, name, loader):
global proxyTally
object.__setattr__(self, “_args”, (name, loader))
proxies.add(name)
proxyTally += 1
#report(“proxy “+name)

# we don’t add any docs for the module in case the
# user tries accessing ‘__doc__’
def __getattribute__(self, key):
if key in [“_args”]:
return object.__getattribute__(self, key)
mod = loadModule(self, *self._args)
return getattr(mod, key)

def __setattr__(self, key, value):
mod = loadModule(self, *self._args)
setattr(mod, key, value)

def __dir__(self):
#modules have special dir handling, invoke that.
return dir(loadModule(self, *self._args))

def __repr__(self):
return “” %(self._args,)

class StandardLoader(object):
“”” A class that wraps the standard imp.load_module into
the new style object hook api, for consistency here
“””
def __init__(self, pathname, desc):
self.pathname, self.desc = pathname, desc

def __repr__(self):
return “” %(self.pathname, self.desc)

def load_module(self, fullname):
try:
f = open(self.pathname, ‘U’)
except:
f = None
try:
return imp.load_module(fullname, f, self.pathname, self.desc)
finally:
if f:
f.close()

class OnDemandLoader(object):
“”” The loader takes a name and real loader of the module to load and
“loads” it – in this case returning loading a proxy that
will only load the class when an attribute is accessed.
“””
def __init__(self, real_loader):
self.real_loader = real_loader

def load_module(self, fullname):
mod = sys.modules.get(fullname)
if not mod:
mod = ModuleProxy(fullname, self.real_loader)
sys.modules[fullname] = mod
return mod

class OnDemandImporter(object):
“”” The on-demand importer imports a module proxy that
inserts the desired module into the calling scope only when
an attribute from the module is actually used.
“””
def find_module(self, fullname, path=None):
if path:
#only bother with sub-modules if they are being loaded
#correctly, i.e. the parent module is already in sys.modules
head, tail = fullname.rsplit(‘.’, 1)
if not sys.modules.get(head):
return None
else:
tail = fullname

# See if the module can be found. It might be trying a relative
# import for example, so often modules are not found.
try:
f, pathname, desc = imp.find_module(tail, path)
if f:
f.close()
except ImportError:
return None #no zip found either

#Now, ignore some modules that we don’t want
#Since this is the meta_path, we just pass it on to the
#rest of the machinery, i.e. pretend not to have found it.
if ignore_module(fullname, pathname):
return None

#Ok, we are going to load this lazily
real_loader = StandardLoader(pathname, desc)
return OnDemandLoader(real_loader)

class OnDemandZipImporter(object):
def __init__(self, path):
importer = zipimport.zipimporter(path)
self.real_importer = importer
self.is_package = importer.is_package
self.get_code = importer.get_code
self.get_source = importer.get_source
self.get_data = importer.get_data
self.get_filename = importer.get_filename

def find_module(self, fullname, path=None):
result = self.real_importer.find_module(fullname, path)
if result is None:
return None
return self

def load_module(self, fullname):
if ignore_module(fullname, self.real_importer.archive):
return self.real_importer.load_module(fullname)

mod = sys.modules.get(fullname)
if not mod:
mod = ModuleProxy(fullname, self.real_importer)
sys.modules[fullname] = mod
return mod

onDemandImporter = OnDemandImporter()
RealReload = reload
def LazyReload(module):
if type(module) is ModuleType:
return RealReload(module)

def install():
if onDemandImporter not in sys.meta_path:
sys.meta_path.append(onDemandImporter)
try:
idx = sys.path_hooks.index(zipimport.zipimporter)
sys.path_hooks[idx] = OnDemandZipImporter
except ValueError:
pass

__builtin__.reload = LazyReload

def uninstall():
try:
sys.meta_path.remove(onDemandImporter)
try:
idx = sys.path_hooks.index(OnDemandZipImporter)
sys.path_hooks[idx] = zipimport.zipimporter
except ValueError:
pass
except ValueError:
return
__builtin__.reload = RealReload

def ignore_module(fullname, pathname=None):
“””
See if we want to ignore demand-loading of this module for any reason
“””
ignore = False
if fullname in ignorenames:
ignore = True
for pkg in ignorepkg:
if fullname.startswith(pkg):
ignore = True
if pathname:
for path in ignorepath:
if path in pathname.lower():
ignore = True
if ignore:
ignored.add(fullname)
return ignore

[/python]

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s