Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add context manager functionality using hooks #121

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 73 additions & 2 deletions lazy_loader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,30 @@
Makes it easy to load subpackages and functions on demand.
"""
import ast
import builtins
import importlib
import importlib.util
import inspect
import os
import sys
import types
import warnings
from contextvars import ContextVar
from importlib.abc import MetaPathFinder
from importlib.machinery import ModuleSpec
from typing import Sequence

__all__ = ["attach", "load", "attach_stub"]

inside_context_manager: ContextVar[bool] = ContextVar(
'inside_context_manager',
default=False,
)
searching_spec: ContextVar[bool] = ContextVar(
'searching_spec',
default=False,
)


def attach(package_name, submodules=None, submod_attrs=None):
"""Attach lazily loaded submodules, functions, or other attributes.
Expand Down Expand Up @@ -257,15 +271,72 @@ def attach_stub(package_name: str, filename: str):
incorrectly (e.g. if it contains an relative import from outside of the module)
"""
stubfile = (
filename if filename.endswith("i") else f"{os.path.splitext(filename)[0]}.pyi"
filename if filename.endswith("i")
else f"{os.path.splitext(filename)[0]}.pyi"
)

if not os.path.exists(stubfile):
raise ValueError(f"Cannot load imports from non-existent stub {stubfile!r}")
raise ValueError(
f"Cannot load imports from non-existent stub {stubfile!r}",
)

with open(stubfile) as f:
stub_node = ast.parse(f.read())

visitor = _StubVisitor()
visitor.visit(stub_node)
return attach(package_name, visitor._submodules, visitor._submod_attrs)


class LazyFinder(MetaPathFinder):

def find_spec(
self,
fullname: str,
path: Sequence[str] | None,
target: types.ModuleType | None = ...,
/ ,
) -> ModuleSpec | None:
if not inside_context_manager.get():
# We are not in context manager, delegate to normal import
return None

if searching_spec.get():
# We are searching for the loader, so we should continue the search
return None

searching_spec.set(True)
spec = importlib.util.find_spec(fullname)
searching_spec.set(False)

if spec is None:
raise ModuleNotFoundError(f"No module named '{fullname}'")

spec.loader = importlib.util.LazyLoader(spec.loader)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the lazy loader produces a delayed evaluation such that any import errors will show up later than expected, and since the user of the lazy_imports context may not realize what is happening under the hood, it might make sense to subclass the LazyLoader and do something like

class MyLazyLoader(LazyLoader):
    def exec_module(self, module):
        try:
            super().exec_module(module)
        except BaseException as e:
            raise ImportError(f"Lazy load of module {module} failed") from e

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I in principle agree with improving the user reporting of errors, so this sounds fine to me.


return spec


sys.meta_path.insert(0, LazyFinder())
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most meta path finders will be targeted at specific set of modules. In those cases it is probably fine to insert the finder like this because in general if there are multiple finders/loaders affecting the same set of modules and injecting specialized behavior there is no way to guarantee that they can both do so safely anyway. In this case, however, we need the LazyFinder to respect other finders. If a finder is inserted into the meta path after this finder then it could preempt the lazy find. To address that, instead of inserting the finder here we should insert it inside the __enter__ of the context manager and then remove it in __exit__ with something like

with threading.Lock():
    sys.meta_path = [f for f in sys.meta_path if not isinstance(finder, LazyFinder)]

We will need to lock meta_path modification (both insertion and deletion) for thread safety. Since the lazy_imports context is by construction not reentrant it should be fine to not include any logic to handle the object already being in the list.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for not answering earlier, as I was on vacation.

I know that it is possible for another hook to be inserted before this one after this is imported. However, that would only happen if it is explicitly inserted at the beginning of the list, which is not very common.

Although ensuring that we are at the first position on __enter__ seems sensible at first glance, in a multithread context this simply cannot be done safely, AFAIK, because hooks are global and not per-thread, so you would be messing with the meta path for other threads too (and other threads could change it too!).

Your code seems broken. Not only it assumes that all threads would synchronize the meta path using the same lock (impracticable), but if I am not mistaken you are creating a new lock every time, so you are not even protecting anything (see pylint-dev/pylint#5208).

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem!

Apologies, my example wasn't intended to be taken verbatim. Concretely, I would suggest storing a lock either at module scope or as a class member of the LazyFinder (e.g. upon first import of the lazy_loader itself). Then you would have a single instance that is shared across all threads and all import calls. I am confused as to why you think having all threads synchronizing the meta path via the same lock would be impractical. In general, thread-safe code requires that all threads would use the same lock, otherwise the lock serves no purpose. Perhaps I am misunderstanding what you mean? Are you concerned that imports might be occurring in a performance-sensitive context? My assumption is that even if you have dozens/hundreds/more threads, the import shouldn't be on the critical path for performance.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, what I mean is that you cannot control what other threads do. In a multithreading context some threads may be created by other libraries or the application. These could in principle access the list while you are checking/changing it, and they won't use your lock. You can only protect by locking the APIs that you define, not the APIs defined elsewhere, unless you are in complete control of all threads of the process.

Copy link

@vyasr vyasr Aug 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes, if any other library includes modification of sys.meta_path beyond simply usage of this context manager in different threads then I agree that we have no control over that. There is nothing that we can do in that case. The question to me is really which case has more risk; allowing the lazy finder to be preempted by different insertion order, or allowing the lazy finder to have possible nondeterministic behavior in a multithreaded scenario where multiple distinct code paths modify sys.meta_path. I was assuming the latter would be far less likely than the former (granting that the former is also quite unlikely) and was largely operating under the assumption that the lazy loader would be the only code modifying sys.meta_path in a multithreaded context. OTOH nondeterministic code is a far greater evil and I wasn't really thinking much about it. I'm fine with leaving the implementation as is and just accepting the limitation.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I think we could do mostly safely is to warn if we are not in first position, possibly even listing which other hooks are before ours, so that users are aware of that and do not spend hours to debug it.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, worst case with the warning is that we report a false positive if some other libraries is inserted and then removed with bad timing in another thread.



class lazy_imports:
"""
Context manager that will block imports and make them lazy.

>>> import lazy_loader
>>> with lazy_loader.lazy_imports():
>>> from ._mod import some_func

"""

def __enter__(self):
# Prevent normal importing
if inside_context_manager.get():
raise ValueError("Nested lazy_imports not allowed.")
inside_context_manager.set(True)
return self

def __exit__(self, type, value, tb):
# Restore normal importing
inside_context_manager.set(False)
5 changes: 5 additions & 0 deletions lazy_loader/tests/fake_pkg_magic/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import lazy_loader as lazy

with lazy.lazy_imports():
from .some_func import some_func
from . import some_mod, nested_pkg
6 changes: 6 additions & 0 deletions lazy_loader/tests/fake_pkg_magic/nested_pkg/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import lazy_loader as lazy

from . import nested_mod_eager

with lazy.lazy_imports():
from . import nested_mod_lazy
Empty file.
Empty file.
3 changes: 3 additions & 0 deletions lazy_loader/tests/fake_pkg_magic/some_func.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
def some_func():
"""Function with same name as submodule."""
pass
2 changes: 2 additions & 0 deletions lazy_loader/tests/fake_pkg_magic/some_mod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class SomeClass:
pass
75 changes: 75 additions & 0 deletions lazy_loader/tests/test_lazy_loader_magic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import importlib
import sys
import types

import lazy_loader as lazy
import pytest


def test_lazy_import_basics():
with lazy.lazy_imports():
import math

with pytest.raises(ImportError):
with lazy.lazy_imports():
import anything_not_real

# Now test that accessing attributes does what it should
assert math.sin(math.pi) == pytest.approx(0, 1e-6)


def test_lazy_import_subpackages():
with lazy.lazy_imports():
import html.parser as hp
assert "html" in sys.modules
assert type(sys.modules["html"]) == type(pytest)
assert isinstance(hp, importlib.util._LazyModule)
assert "html.parser" in sys.modules
assert sys.modules["html.parser"] == hp


def test_lazy_import_impact_on_sys_modules():
with lazy.lazy_imports():
import math

with pytest.raises(ImportError):
with lazy.lazy_imports():
import anything_not_real

assert isinstance(math, types.ModuleType)
assert "math" in sys.modules
assert "anything_not_real" not in sys.modules

# only do this if numpy is installed
pytest.importorskip("numpy")
with lazy.lazy_imports():
import numpy as np
assert isinstance(np, types.ModuleType)
assert "numpy" in sys.modules

np.pi # trigger load of numpy

assert isinstance(np, types.ModuleType)
assert "numpy" in sys.modules


def test_lazy_import_nonbuiltins():
with lazy.lazy_imports():
import numpy as np
import scipy as sp

assert np.sin(np.pi) == pytest.approx(0, 1e-6)


def test_attach_same_module_and_attr_name():
from lazy_loader.tests import fake_pkg_magic

# Grab attribute twice, to ensure that importing it does not
# override function by module
assert isinstance(fake_pkg_magic.some_func, types.FunctionType)
assert isinstance(fake_pkg_magic.some_func, types.FunctionType)

# Ensure imports from submodule still work
from lazy_loader.tests.fake_pkg_magic.some_func import some_func

assert isinstance(some_func, types.FunctionType)
Loading