893 lines
31 KiB
Rust
893 lines
31 KiB
Rust
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
|
|
/*!
|
|
Functionality for a Python importer.
|
|
|
|
This module defines a Python meta path importer and associated functionality
|
|
for importing Python modules from memory.
|
|
*/
|
|
|
|
use std::cell::RefCell;
|
|
use std::collections::{HashMap, HashSet};
|
|
use std::ffi::CStr;
|
|
use std::io::Cursor;
|
|
use std::sync::Arc;
|
|
|
|
use byteorder::{LittleEndian, ReadBytesExt};
|
|
use cpython::exc::{FileNotFoundError, ImportError, RuntimeError, ValueError};
|
|
use cpython::{
|
|
py_class, py_class_impl, py_coerce_item, py_fn, NoArgs, ObjectProtocol, PyClone, PyDict, PyErr,
|
|
PyList, PyModule, PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
|
|
};
|
|
use python3_sys as pyffi;
|
|
use python3_sys::{PyBUF_READ, PyMemoryView_FromMemory};
|
|
|
|
use super::pyinterp::PYOXIDIZER_IMPORTER_NAME;
|
|
|
|
/// Obtain a Python memoryview referencing a memory slice.
|
|
///
|
|
/// New memoryview allows Python to access the underlying memory without
|
|
/// copying it.
|
|
#[inline]
|
|
fn get_memory_view(py: Python, data: &'static [u8]) -> Option<PyObject> {
|
|
let ptr = unsafe { PyMemoryView_FromMemory(data.as_ptr() as _, data.len() as _, PyBUF_READ) };
|
|
unsafe { PyObject::from_owned_ptr_opt(py, ptr) }
|
|
}
|
|
|
|
/// Holds pointers to Python module data in memory.
|
|
#[derive(Debug)]
|
|
struct PythonModuleData {
|
|
source: Option<&'static [u8]>,
|
|
bytecode: Option<&'static [u8]>,
|
|
}
|
|
|
|
impl PythonModuleData {
|
|
/// Obtain a PyMemoryView instance for source data.
|
|
fn get_source_memory_view(&self, py: Python) -> Option<PyObject> {
|
|
match self.source {
|
|
Some(data) => get_memory_view(py, data),
|
|
None => None,
|
|
}
|
|
}
|
|
|
|
/// Obtain a PyMemoryView instance for bytecode data.
|
|
fn get_bytecode_memory_view(&self, py: Python) -> Option<PyObject> {
|
|
match self.bytecode {
|
|
Some(data) => get_memory_view(py, data),
|
|
None => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Represents Python modules data in memory.
|
|
///
|
|
/// This is essentially an index over a raw backing blob.
|
|
struct PythonModulesData {
|
|
data: HashMap<&'static str, PythonModuleData>,
|
|
}
|
|
|
|
impl PythonModulesData {
|
|
/// Construct a new instance from a memory slice.
|
|
fn from(data: &'static [u8]) -> Result<PythonModulesData, &'static str> {
|
|
let mut reader = Cursor::new(data);
|
|
|
|
let count = reader
|
|
.read_u32::<LittleEndian>()
|
|
.or_else(|_| Err("failed reading count"))?;
|
|
|
|
let mut index = Vec::with_capacity(count as usize);
|
|
let mut total_names_length = 0;
|
|
let mut total_sources_length = 0;
|
|
|
|
for _ in 0..count {
|
|
let name_length = reader
|
|
.read_u32::<LittleEndian>()
|
|
.or_else(|_| Err("failed reading name length"))?
|
|
as usize;
|
|
let source_length = reader
|
|
.read_u32::<LittleEndian>()
|
|
.or_else(|_| Err("failed reading source length"))?
|
|
as usize;
|
|
let bytecode_length = reader
|
|
.read_u32::<LittleEndian>()
|
|
.or_else(|_| Err("failed reading bytecode length"))?
|
|
as usize;
|
|
|
|
index.push((name_length, source_length, bytecode_length));
|
|
total_names_length += name_length;
|
|
total_sources_length += source_length;
|
|
}
|
|
|
|
let mut res = HashMap::with_capacity(count as usize);
|
|
let sources_start_offset = reader.position() as usize + total_names_length;
|
|
let bytecodes_start_offset = sources_start_offset + total_sources_length;
|
|
|
|
let mut sources_current_offset: usize = 0;
|
|
let mut bytecodes_current_offset: usize = 0;
|
|
|
|
for (name_length, source_length, bytecode_length) in index {
|
|
let offset = reader.position() as usize;
|
|
|
|
let name =
|
|
unsafe { std::str::from_utf8_unchecked(&data[offset..offset + name_length]) };
|
|
|
|
let source_offset = sources_start_offset + sources_current_offset;
|
|
let source = if source_length > 0 {
|
|
Some(&data[source_offset..source_offset + source_length])
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let bytecode_offset = bytecodes_start_offset + bytecodes_current_offset;
|
|
let bytecode = if bytecode_length > 0 {
|
|
Some(&data[bytecode_offset..bytecode_offset + bytecode_length])
|
|
} else {
|
|
None
|
|
};
|
|
|
|
reader.set_position(offset as u64 + name_length as u64);
|
|
|
|
sources_current_offset += source_length;
|
|
bytecodes_current_offset += bytecode_length;
|
|
|
|
res.insert(name, PythonModuleData { source, bytecode });
|
|
}
|
|
|
|
Ok(PythonModulesData { data: res })
|
|
}
|
|
}
|
|
|
|
/// Represents Python resources data in memory.
|
|
///
|
|
/// This is essentially an index over a raw backing blob.
|
|
struct PythonResourcesData {
|
|
packages: HashMap<&'static str, Arc<Box<HashMap<&'static str, &'static [u8]>>>>,
|
|
}
|
|
|
|
impl PythonResourcesData {
|
|
fn from(data: &'static [u8]) -> Result<PythonResourcesData, &'static str> {
|
|
let mut reader = Cursor::new(data);
|
|
|
|
let package_count = reader
|
|
.read_u32::<LittleEndian>()
|
|
.or_else(|_| Err("failed reading package count"))? as usize;
|
|
|
|
let mut index = Vec::with_capacity(package_count);
|
|
let mut total_names_length = 0;
|
|
|
|
for _ in 0..package_count {
|
|
let package_name_length = reader
|
|
.read_u32::<LittleEndian>()
|
|
.or_else(|_| Err("failed reading package name length"))?
|
|
as usize;
|
|
let resource_count = reader
|
|
.read_u32::<LittleEndian>()
|
|
.or_else(|_| Err("failed reading resource count"))?
|
|
as usize;
|
|
|
|
total_names_length += package_name_length;
|
|
|
|
let mut package_index = Vec::with_capacity(resource_count);
|
|
|
|
for _ in 0..resource_count {
|
|
let resource_name_length = reader
|
|
.read_u32::<LittleEndian>()
|
|
.or_else(|_| Err("failed reading resource name length"))?
|
|
as usize;
|
|
let resource_data_length = reader
|
|
.read_u32::<LittleEndian>()
|
|
.or_else(|_| Err("failed reading resource data length"))?
|
|
as usize;
|
|
|
|
total_names_length += resource_name_length;
|
|
|
|
package_index.push((resource_name_length, resource_data_length));
|
|
}
|
|
|
|
index.push((package_name_length, package_index));
|
|
}
|
|
|
|
let mut name_offset = reader.position() as usize;
|
|
let data_offset = name_offset + total_names_length;
|
|
let mut res = HashMap::new();
|
|
|
|
for (package_name_length, package_index) in index {
|
|
let package_name = unsafe {
|
|
std::str::from_utf8_unchecked(&data[name_offset..name_offset + package_name_length])
|
|
};
|
|
|
|
name_offset += package_name_length;
|
|
|
|
let mut package_data = Box::new(HashMap::new());
|
|
|
|
for (resource_name_length, resource_data_length) in package_index {
|
|
let resource_name = unsafe {
|
|
std::str::from_utf8_unchecked(
|
|
&data[name_offset..name_offset + resource_name_length],
|
|
)
|
|
};
|
|
|
|
name_offset += resource_name_length;
|
|
|
|
let resource_data = &data[data_offset..data_offset + resource_data_length];
|
|
|
|
package_data.insert(resource_name, resource_data);
|
|
}
|
|
|
|
res.insert(package_name, Arc::new(package_data));
|
|
}
|
|
|
|
Ok(PythonResourcesData { packages: res })
|
|
}
|
|
}
|
|
|
|
#[allow(unused_doc_comments)]
|
|
/// Python type to import modules.
|
|
///
|
|
/// This type implements the importlib.abc.MetaPathFinder interface for
|
|
/// finding/loading modules. It supports loading various flavors of modules,
|
|
/// allowing it to be the only registered sys.meta_path importer.
|
|
py_class!(class PyOxidizerFinder |py| {
|
|
data imp_module: PyModule;
|
|
data marshal_loads: PyObject;
|
|
data builtin_importer: PyObject;
|
|
data frozen_importer: PyObject;
|
|
data call_with_frames_removed: PyObject;
|
|
data module_spec_type: PyObject;
|
|
data decode_source: PyObject;
|
|
data exec_fn: PyObject;
|
|
data packages: HashSet<&'static str>;
|
|
data known_modules: KnownModules;
|
|
data resources: HashMap<&'static str, Arc<Box<HashMap<&'static str, &'static [u8]>>>>;
|
|
data resource_readers: RefCell<Box<HashMap<String, PyObject>>>;
|
|
|
|
// Start of importlib.abc.MetaPathFinder interface.
|
|
|
|
def find_spec(&self, fullname: &PyString, path: &PyObject, target: Option<PyObject> = None) -> PyResult<PyObject> {
|
|
let key = fullname.to_string(py)?;
|
|
|
|
if let Some(flavor) = self.known_modules(py).get(&*key) {
|
|
match flavor {
|
|
KnownModuleFlavor::Builtin => {
|
|
// BuiltinImporter.find_spec() always returns None if `path` is defined.
|
|
// And it doesn't use `target`. So don't proxy these values.
|
|
self.builtin_importer(py).call_method(py, "find_spec", (fullname,), None)
|
|
}
|
|
KnownModuleFlavor::Frozen => {
|
|
self.frozen_importer(py).call_method(py, "find_spec", (fullname, path, target), None)
|
|
}
|
|
KnownModuleFlavor::InMemory { .. } => {
|
|
let is_package = self.packages(py).contains(&*key);
|
|
|
|
// TODO consider setting origin and has_location so __file__ will be
|
|
// populated.
|
|
|
|
let kwargs = PyDict::new(py);
|
|
kwargs.set_item(py, "is_package", is_package)?;
|
|
|
|
self.module_spec_type(py).call(py, (fullname, self), Some(&kwargs))
|
|
}
|
|
}
|
|
} else {
|
|
Ok(py.None())
|
|
}
|
|
}
|
|
|
|
def find_module(&self, _fullname: &PyObject, _path: &PyObject) -> PyResult<PyObject> {
|
|
// Method is deprecated. Always returns None.
|
|
// We /could/ call find_spec(). Meh.
|
|
Ok(py.None())
|
|
}
|
|
|
|
def invalidate_caches(&self) -> PyResult<PyObject> {
|
|
Ok(py.None())
|
|
}
|
|
|
|
// End of importlib.abc.MetaPathFinder interface.
|
|
|
|
// Start of importlib.abc.Loader interface.
|
|
|
|
def create_module(&self, _spec: &PyObject) -> PyResult<PyObject> {
|
|
Ok(py.None())
|
|
}
|
|
|
|
def exec_module(&self, module: &PyObject) -> PyResult<PyObject> {
|
|
let name = module.getattr(py, "__name__")?;
|
|
let key = name.extract::<String>(py)?;
|
|
|
|
if let Some(flavor) = self.known_modules(py).get(&*key) {
|
|
match flavor {
|
|
KnownModuleFlavor::Builtin => {
|
|
self.builtin_importer(py).call_method(py, "exec_module", (module,), None)
|
|
},
|
|
KnownModuleFlavor::Frozen => {
|
|
self.frozen_importer(py).call_method(py, "exec_module", (module,), None)
|
|
},
|
|
KnownModuleFlavor::InMemory { module_data } => {
|
|
match module_data.get_bytecode_memory_view(py) {
|
|
Some(value) => {
|
|
let code = self.marshal_loads(py).call(py, (value,), None)?;
|
|
let exec_fn = self.exec_fn(py);
|
|
let dict = module.getattr(py, "__dict__")?;
|
|
|
|
self.call_with_frames_removed(py).call(py, (exec_fn, code, dict), None)
|
|
},
|
|
None => {
|
|
Err(PyErr::new::<ImportError, _>(py, ("cannot find code in memory", name)))
|
|
}
|
|
}
|
|
},
|
|
}
|
|
} else {
|
|
// Raising here might make more sense, as exec_module() shouldn't
|
|
// be called on the Loader that didn't create the module.
|
|
Ok(py.None())
|
|
}
|
|
}
|
|
|
|
// End of importlib.abc.Loader interface.
|
|
|
|
// Start of importlib.abc.InspectLoader interface.
|
|
|
|
def get_code(&self, fullname: &PyString) -> PyResult<PyObject> {
|
|
let key = fullname.to_string(py)?;
|
|
|
|
if let Some(flavor) = self.known_modules(py).get(&*key) {
|
|
match flavor {
|
|
KnownModuleFlavor::Frozen => {
|
|
let imp_module = self.imp_module(py);
|
|
|
|
imp_module.call(py, "get_frozen_object", (fullname,), None)
|
|
},
|
|
KnownModuleFlavor::InMemory { module_data } => {
|
|
match module_data.get_bytecode_memory_view(py) {
|
|
Some(value) => {
|
|
self.marshal_loads(py).call(py, (value,), None)
|
|
}
|
|
None => {
|
|
Err(PyErr::new::<ImportError, _>(py, ("cannot find code in memory", fullname)))
|
|
}
|
|
}
|
|
},
|
|
KnownModuleFlavor::Builtin => {
|
|
Ok(py.None())
|
|
}
|
|
}
|
|
} else {
|
|
Ok(py.None())
|
|
}
|
|
}
|
|
|
|
def get_source(&self, fullname: &PyString) -> PyResult<PyObject> {
|
|
let key = fullname.to_string(py)?;
|
|
|
|
if let Some(flavor) = self.known_modules(py).get(&*key) {
|
|
if let KnownModuleFlavor::InMemory { module_data } = flavor {
|
|
match module_data.get_source_memory_view(py) {
|
|
Some(value) => {
|
|
self.decode_source(py).call(py, (value,), None)
|
|
},
|
|
None => {
|
|
Err(PyErr::new::<ImportError, _>(py, ("source not available", fullname)))
|
|
}
|
|
}
|
|
} else {
|
|
Ok(py.None())
|
|
}
|
|
} else {
|
|
Ok(py.None())
|
|
}
|
|
}
|
|
|
|
// End of importlib.abc.InspectLoader interface.
|
|
|
|
// Support obtaining ResourceReader instances.
|
|
def get_resource_loader(&self, fullname: &PyString) -> PyResult<PyObject> {
|
|
let key = fullname.to_string(py)?;
|
|
|
|
// This should not happen since code below should not be recursive into this
|
|
// function.
|
|
let mut resource_readers = match self.resource_readers(py).try_borrow_mut() {
|
|
Ok(v) => v,
|
|
Err(_) => {
|
|
return Err(PyErr::new::<RuntimeError, _>(py, "resource reader already borrowed"));
|
|
}
|
|
};
|
|
|
|
// Return an existing instance if we have one.
|
|
if let Some(reader) = resource_readers.get(&*key) {
|
|
return Ok(reader.clone_ref(py));
|
|
}
|
|
|
|
// Only create a reader if the name is a package.
|
|
if self.packages(py).contains(&*key) {
|
|
|
|
// Not all packages have known resources.
|
|
let resources = match self.resources(py).get(&*key) {
|
|
Some(v) => v.clone(),
|
|
None => {
|
|
let h: Box<HashMap<&'static str, &'static [u8]>> = Box::new(HashMap::new());
|
|
Arc::new(h)
|
|
}
|
|
};
|
|
|
|
let reader = PyOxidizerResourceReader::create_instance(py, resources)?.into_object();
|
|
resource_readers.insert(key.to_string(), reader.clone_ref(py));
|
|
|
|
Ok(reader)
|
|
} else {
|
|
Ok(py.None())
|
|
}
|
|
}
|
|
});
|
|
|
|
#[allow(unused_doc_comments)]
|
|
/// Implements in-memory reading of resource data.
|
|
///
|
|
/// Implements importlib.abc.ResourceReader.
|
|
py_class!(class PyOxidizerResourceReader |py| {
|
|
data resources: Arc<Box<HashMap<&'static str, &'static [u8]>>>;
|
|
|
|
/// Returns an opened, file-like object for binary reading of the resource.
|
|
///
|
|
/// If the resource cannot be found, FileNotFoundError is raised.
|
|
def open_resource(&self, resource: &PyString) -> PyResult<PyObject> {
|
|
let key = resource.to_string(py)?;
|
|
|
|
if let Some(data) = self.resources(py).get(&*key) {
|
|
match get_memory_view(py, data) {
|
|
Some(mv) => {
|
|
let io_module = py.import("io")?;
|
|
let bytes_io = io_module.get(py, "BytesIO")?;
|
|
|
|
bytes_io.call(py, (mv,), None)
|
|
}
|
|
None => Err(PyErr::fetch(py))
|
|
}
|
|
} else {
|
|
Err(PyErr::new::<FileNotFoundError, _>(py, "resource not found"))
|
|
}
|
|
}
|
|
|
|
/// Returns the file system path to the resource.
|
|
///
|
|
/// If the resource does not concretely exist on the file system, raise
|
|
/// FileNotFoundError.
|
|
def resource_path(&self, _resource: &PyString) -> PyResult<PyObject> {
|
|
Err(PyErr::new::<FileNotFoundError, _>(py, "in-memory resources do not have filesystem paths"))
|
|
}
|
|
|
|
/// Returns True if the named name is considered a resource. FileNotFoundError
|
|
/// is raised if name does not exist.
|
|
def is_resource(&self, name: &PyString) -> PyResult<PyObject> {
|
|
let key = name.to_string(py)?;
|
|
|
|
if self.resources(py).contains_key(&*key) {
|
|
Ok(py.True().as_object().clone_ref(py))
|
|
} else {
|
|
Err(PyErr::new::<FileNotFoundError, _>(py, "resource not found"))
|
|
}
|
|
}
|
|
|
|
/// Returns an iterable of strings over the contents of the package.
|
|
///
|
|
/// Do note that it is not required that all names returned by the iterator be actual resources,
|
|
/// e.g. it is acceptable to return names for which is_resource() would be false.
|
|
///
|
|
/// Allowing non-resource names to be returned is to allow for situations where how a package
|
|
/// and its resources are stored are known a priori and the non-resource names would be useful.
|
|
/// For instance, returning subdirectory names is allowed so that when it is known that the
|
|
/// package and resources are stored on the file system then those subdirectory names can be
|
|
/// used directly.
|
|
def contents(&self) -> PyResult<PyObject> {
|
|
let resources = self.resources(py);
|
|
let mut names = Vec::with_capacity(resources.len());
|
|
|
|
for name in resources.keys() {
|
|
names.push(name.to_py_object(py));
|
|
}
|
|
|
|
let names_list = names.to_py_object(py);
|
|
|
|
Ok(names_list.as_object().clone_ref(py))
|
|
}
|
|
});
|
|
|
|
fn populate_packages(packages: &mut HashSet<&'static str>, name: &'static str) {
|
|
let mut search = name;
|
|
|
|
while let Some(idx) = search.rfind('.') {
|
|
packages.insert(&search[0..idx]);
|
|
search = &search[0..idx];
|
|
}
|
|
}
|
|
|
|
const DOC: &[u8] = b"Binary representation of Python modules\0";
|
|
|
|
/// Represents global module state to be passed at interpreter initialization time.
|
|
#[derive(Debug)]
|
|
pub struct InitModuleState {
|
|
/// Whether to register the filesystem importer on sys.meta_path.
|
|
pub register_filesystem_importer: bool,
|
|
|
|
/// Values to set on sys.path.
|
|
pub sys_paths: Vec<String>,
|
|
|
|
/// Raw data constituting Python module source code.
|
|
pub py_modules_data: &'static [u8],
|
|
|
|
/// Raw data constituting Python resources data.
|
|
pub py_resources_data: &'static [u8],
|
|
}
|
|
|
|
/// Holds reference to next module state struct.
|
|
///
|
|
/// This module state will be copied into the module's state when the
|
|
/// Python module is initialized.
|
|
pub static mut NEXT_MODULE_STATE: *const InitModuleState = std::ptr::null();
|
|
|
|
/// Represents which importer to use for known modules.
|
|
#[derive(Debug)]
|
|
enum KnownModuleFlavor {
|
|
Builtin,
|
|
Frozen,
|
|
InMemory { module_data: PythonModuleData },
|
|
}
|
|
|
|
type KnownModules = HashMap<&'static str, KnownModuleFlavor>;
|
|
|
|
/// State associated with each importer module instance.
|
|
///
|
|
/// We write per-module state to per-module instances of this struct so
|
|
/// we don't rely on global variables and so multiple importer modules can
|
|
/// exist without issue.
|
|
#[derive(Debug)]
|
|
struct ModuleState {
|
|
/// Whether to register PathFinder on sys.meta_path.
|
|
register_filesystem_importer: bool,
|
|
|
|
/// Values to set on sys.path.
|
|
sys_paths: Vec<String>,
|
|
|
|
/// Raw data constituting Python module source code.
|
|
py_modules_data: &'static [u8],
|
|
|
|
/// Raw data constituting Python resources data.
|
|
py_resources_data: &'static [u8],
|
|
|
|
/// Whether setup() has been called.
|
|
setup_called: bool,
|
|
}
|
|
|
|
/// Obtain the module state for an instance of our importer module.
|
|
///
|
|
/// Creates a Python exception on failure.
|
|
///
|
|
/// Doesn't do type checking that the PyModule is of the appropriate type.
|
|
fn get_module_state<'a>(py: Python, m: &'a PyModule) -> Result<&'a mut ModuleState, PyErr> {
|
|
let ptr = m.as_object().as_ptr();
|
|
let state = unsafe { pyffi::PyModule_GetState(ptr) as *mut ModuleState };
|
|
|
|
if state.is_null() {
|
|
let err = PyErr::new::<ValueError, _>(py, "unable to retrieve module state");
|
|
return Err(err);
|
|
}
|
|
|
|
Ok(unsafe { &mut *state })
|
|
}
|
|
|
|
/// Initialize the Python module object.
|
|
///
|
|
/// This is called as part of the PyInit_* function to create the internal
|
|
/// module object for the interpreter.
|
|
///
|
|
/// This receives a handle to the current Python interpreter and just-created
|
|
/// Python module instance. It populates the internal module state and registers
|
|
/// a _setup() on the module object for usage by Python.
|
|
///
|
|
/// Because this function accesses NEXT_MODULE_STATE, it should only be
|
|
/// called during interpreter initialization.
|
|
fn module_init(py: Python, m: &PyModule) -> PyResult<()> {
|
|
let mut state = get_module_state(py, m)?;
|
|
|
|
unsafe {
|
|
state.register_filesystem_importer = (*NEXT_MODULE_STATE).register_filesystem_importer;
|
|
// TODO we could move the value if we wanted to avoid the clone().
|
|
state.sys_paths = (*NEXT_MODULE_STATE).sys_paths.clone();
|
|
state.py_modules_data = (*NEXT_MODULE_STATE).py_modules_data;
|
|
state.py_resources_data = (*NEXT_MODULE_STATE).py_resources_data;
|
|
}
|
|
|
|
state.setup_called = false;
|
|
|
|
m.add(
|
|
py,
|
|
"_setup",
|
|
py_fn!(
|
|
py,
|
|
module_setup(
|
|
m: PyModule,
|
|
bootstrap_module: PyModule,
|
|
marshal_module: PyModule,
|
|
decode_source: PyObject
|
|
)
|
|
),
|
|
)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Called after module import/initialization to configure the importing mechanism.
|
|
///
|
|
/// This does the heavy work of configuring the importing mechanism.
|
|
///
|
|
/// This function should only be called once as part of
|
|
/// _frozen_importlib_external._install_external_importers().
|
|
fn module_setup(
|
|
py: Python,
|
|
m: PyModule,
|
|
bootstrap_module: PyModule,
|
|
marshal_module: PyModule,
|
|
decode_source: PyObject,
|
|
) -> PyResult<PyObject> {
|
|
let state = get_module_state(py, &m)?;
|
|
|
|
if state.setup_called {
|
|
return Err(PyErr::new::<RuntimeError, _>(
|
|
py,
|
|
"PyOxidizer _setup() already called",
|
|
));
|
|
}
|
|
|
|
state.setup_called = true;
|
|
|
|
let imp_module = bootstrap_module.get(py, "_imp")?;
|
|
let imp_module = imp_module.cast_into::<PyModule>(py)?;
|
|
let sys_module = bootstrap_module.get(py, "sys")?;
|
|
let sys_module = sys_module.cast_as::<PyModule>(py)?;
|
|
let meta_path_object = sys_module.get(py, "meta_path")?;
|
|
|
|
// We should be executing as part of
|
|
// _frozen_importlib_external._install_external_importers().
|
|
// _frozen_importlib._install() should have already been called and set up
|
|
// sys.meta_path with [BuiltinImporter, FrozenImporter]. Those should be the
|
|
// only meta path importers present.
|
|
|
|
let meta_path = meta_path_object.cast_as::<PyList>(py)?;
|
|
|
|
if meta_path.len(py) != 2 {
|
|
return Err(PyErr::new::<ValueError, _>(
|
|
py,
|
|
"sys.meta_path does not contain 2 values",
|
|
));
|
|
}
|
|
|
|
let builtin_importer = meta_path.get_item(py, 0);
|
|
let frozen_importer = meta_path.get_item(py, 1);
|
|
|
|
// It may seem inefficient to create a full HashMap of the parsed data instead of e.g.
|
|
// streaming it. But the overhead of iterators was measured to be more than building
|
|
// up a temporary HashMap.
|
|
let modules_data = match PythonModulesData::from(state.py_modules_data) {
|
|
Ok(v) => v,
|
|
Err(msg) => return Err(PyErr::new::<ValueError, _>(py, msg)),
|
|
};
|
|
|
|
// Populate our known module lookup table with entries from builtins, frozens, and
|
|
// finally us. Last write wins and has the same effect as registering our
|
|
// meta path importer first. This should be safe. If nothing else, it allows
|
|
// some builtins to be overwritten by .py implemented modules.
|
|
let mut known_modules = KnownModules::with_capacity(modules_data.data.len() + 10);
|
|
|
|
for i in 0.. {
|
|
let record = unsafe { pyffi::PyImport_Inittab.offset(i) };
|
|
|
|
if unsafe { *record }.name.is_null() {
|
|
break;
|
|
}
|
|
|
|
let name = unsafe { CStr::from_ptr((*record).name as _) };
|
|
let name_str = match name.to_str() {
|
|
Ok(v) => v,
|
|
Err(_) => {
|
|
return Err(PyErr::new::<ValueError, _>(
|
|
py,
|
|
"unable to parse PyImport_Inittab",
|
|
));
|
|
}
|
|
};
|
|
|
|
known_modules.insert(name_str, KnownModuleFlavor::Builtin);
|
|
}
|
|
|
|
for i in 0.. {
|
|
let record = unsafe { pyffi::PyImport_FrozenModules.offset(i) };
|
|
|
|
if unsafe { *record }.name.is_null() {
|
|
break;
|
|
}
|
|
|
|
let name = unsafe { CStr::from_ptr((*record).name as _) };
|
|
let name_str = match name.to_str() {
|
|
Ok(v) => v,
|
|
Err(_) => {
|
|
return Err(PyErr::new::<ValueError, _>(
|
|
py,
|
|
"unable to parse PyImport_FrozenModules",
|
|
));
|
|
}
|
|
};
|
|
|
|
known_modules.insert(name_str, KnownModuleFlavor::Frozen);
|
|
}
|
|
|
|
// TODO consider baking set of packages into embedded data.
|
|
let mut packages: HashSet<&'static str> = HashSet::with_capacity(modules_data.data.len());
|
|
|
|
for (name, record) in modules_data.data {
|
|
known_modules.insert(
|
|
name,
|
|
KnownModuleFlavor::InMemory {
|
|
module_data: record,
|
|
},
|
|
);
|
|
populate_packages(&mut packages, name);
|
|
}
|
|
|
|
let resources_data = match PythonResourcesData::from(state.py_resources_data) {
|
|
Ok(v) => v,
|
|
Err(msg) => return Err(PyErr::new::<ValueError, _>(py, msg)),
|
|
};
|
|
|
|
let marshal_loads = marshal_module.get(py, "loads")?;
|
|
let call_with_frames_removed = bootstrap_module.get(py, "_call_with_frames_removed")?;
|
|
let module_spec_type = bootstrap_module.get(py, "ModuleSpec")?;
|
|
|
|
let builtins_module =
|
|
match unsafe { PyObject::from_borrowed_ptr_opt(py, pyffi::PyEval_GetBuiltins()) } {
|
|
Some(o) => o.cast_into::<PyDict>(py),
|
|
None => {
|
|
return Err(PyErr::new::<ValueError, _>(
|
|
py,
|
|
"unable to obtain __builtins__",
|
|
));
|
|
}
|
|
}?;
|
|
|
|
let exec_fn = match builtins_module.get_item(py, "exec") {
|
|
Some(v) => v,
|
|
None => {
|
|
return Err(PyErr::new::<ValueError, _>(
|
|
py,
|
|
"could not obtain __builtins__.exec",
|
|
));
|
|
}
|
|
};
|
|
|
|
let resource_readers: RefCell<Box<HashMap<String, PyObject>>> =
|
|
RefCell::new(Box::new(HashMap::new()));
|
|
|
|
let unified_importer = PyOxidizerFinder::create_instance(
|
|
py,
|
|
imp_module,
|
|
marshal_loads,
|
|
builtin_importer,
|
|
frozen_importer,
|
|
call_with_frames_removed,
|
|
module_spec_type,
|
|
decode_source,
|
|
exec_fn,
|
|
packages,
|
|
known_modules,
|
|
resources_data.packages,
|
|
resource_readers,
|
|
)?;
|
|
meta_path_object.call_method(py, "clear", NoArgs, None)?;
|
|
meta_path_object.call_method(py, "append", (unified_importer,), None)?;
|
|
|
|
// At this point the importing mechanism is fully initialized to use our
|
|
// unified importer, which handles built-in, frozen, and in-memory imports.
|
|
|
|
// Because we're probably running during Py_Initialize() and stdlib modules
|
|
// may not be in-memory, we need to register and configure additional importers
|
|
// here, before continuing with Py_Initialize(), otherwise we may not find
|
|
// the standard library!
|
|
|
|
if state.register_filesystem_importer {
|
|
// This is what importlib._bootstrap_external usually does:
|
|
// supported_loaders = _get_supported_file_loaders()
|
|
// sys.path_hooks.extend([FileFinder.path_hook(*supported_loaders)])
|
|
// sys.meta_path.append(PathFinder)
|
|
let frozen_importlib_external = py.import("_frozen_importlib_external")?;
|
|
|
|
let loaders =
|
|
frozen_importlib_external.call(py, "_get_supported_file_loaders", NoArgs, None)?;
|
|
let loaders_list = loaders.cast_as::<PyList>(py)?;
|
|
let loaders_vec: Vec<PyObject> = loaders_list.iter(py).collect();
|
|
let loaders_tuple = PyTuple::new(py, loaders_vec.as_slice());
|
|
|
|
let file_finder = frozen_importlib_external.get(py, "FileFinder")?;
|
|
let path_hook = file_finder.call_method(py, "path_hook", loaders_tuple, None)?;
|
|
let path_hooks = sys_module.get(py, "path_hooks")?;
|
|
path_hooks.call_method(py, "append", (path_hook,), None)?;
|
|
|
|
let path_finder = frozen_importlib_external.get(py, "PathFinder")?;
|
|
let meta_path = sys_module.get(py, "meta_path")?;
|
|
meta_path.call_method(py, "append", (path_finder,), None)?;
|
|
}
|
|
|
|
// Ideally we should be calling Py_SetPath() before Py_Initialize() to set sys.path.
|
|
// But we tried to do this and only ran into problems due to string conversions,
|
|
// unwanted side-effects. Updating sys.path directly before it is used by PathFinder
|
|
// (which was just registered above) should have the same effect.
|
|
|
|
// Always clear out sys.path.
|
|
let sys_path = sys_module.get(py, "path")?;
|
|
sys_path.call_method(py, "clear", NoArgs, None)?;
|
|
|
|
// And repopulate it with entries from the config.
|
|
for path in &state.sys_paths {
|
|
let py_path = PyString::new(py, path.as_str());
|
|
|
|
sys_path.call_method(py, "append", (py_path,), None)?;
|
|
}
|
|
|
|
Ok(py.None())
|
|
}
|
|
|
|
static mut MODULE_DEF: pyffi::PyModuleDef = pyffi::PyModuleDef {
|
|
m_base: pyffi::PyModuleDef_HEAD_INIT,
|
|
m_name: std::ptr::null(),
|
|
m_doc: std::ptr::null(),
|
|
m_size: std::mem::size_of::<ModuleState>() as isize,
|
|
m_methods: 0 as *mut _,
|
|
m_slots: 0 as *mut _,
|
|
m_traverse: None,
|
|
m_clear: None,
|
|
m_free: None,
|
|
};
|
|
|
|
/// Module initialization function.
|
|
///
|
|
/// This creates the Python module object.
|
|
///
|
|
/// We don't use the macros in the cpython crate because they are somewhat
|
|
/// opinionated about how things should work. e.g. they call
|
|
/// PyEval_InitThreads(), which is undesired. We want total control.
|
|
#[allow(non_snake_case)]
|
|
pub extern "C" fn PyInit__pyoxidizer_importer() -> *mut pyffi::PyObject {
|
|
let py = unsafe { cpython::Python::assume_gil_acquired() };
|
|
|
|
// TRACKING RUST1.32 We can't call as_ptr() in const fn in Rust 1.31.
|
|
unsafe {
|
|
if MODULE_DEF.m_name.is_null() {
|
|
MODULE_DEF.m_name = PYOXIDIZER_IMPORTER_NAME.as_ptr() as *const _;
|
|
MODULE_DEF.m_doc = DOC.as_ptr() as *const _;
|
|
}
|
|
}
|
|
|
|
let module = unsafe { pyffi::PyModule_Create(&mut MODULE_DEF) };
|
|
|
|
if module.is_null() {
|
|
return module;
|
|
}
|
|
|
|
let module = match unsafe { PyObject::from_owned_ptr(py, module).cast_into::<PyModule>(py) } {
|
|
Ok(m) => m,
|
|
Err(e) => {
|
|
PyErr::from(e).restore(py);
|
|
return std::ptr::null_mut();
|
|
}
|
|
};
|
|
|
|
match module_init(py, &module) {
|
|
Ok(()) => module.into_object().steal_ptr(),
|
|
Err(e) => {
|
|
e.restore(py);
|
|
std::ptr::null_mut()
|
|
}
|
|
}
|
|
}
|