Allow packaging with pyOxidizer

This commit is contained in:
Gabriel Augendre 2019-07-11 22:24:36 +02:00
parent 5624cc870d
commit 34ed538f9d
16 changed files with 2759 additions and 1 deletions

8
.gitignore vendored
View file

@ -212,3 +212,11 @@ Temporary Items
.apdisk .apdisk
# End of https://www.gitignore.io/api/macos # End of https://www.gitignore.io/api/macos
#Added by cargo
#
#already existing elements are commented out
/target
**/*.rs.bk

351
Cargo.lock generated Normal file
View file

@ -0,0 +1,351 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "aho-corasick"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "autocfg"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bitflags"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cc"
version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cloudabi"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "cpython"
version = "0.2.1"
source = "git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0#2093cc5ab9b29d7db2255a0df836d89e440754b6"
dependencies = [
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
"python3-sys 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)",
]
[[package]]
name = "fs_extra"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "insee_translator"
version = "0.1.0"
dependencies = [
"jemallocator-global 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"pyembed 0.2.0",
]
[[package]]
name = "jemalloc-sys"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)",
"fs_extra 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "jemallocator"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"jemalloc-sys 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "jemallocator-global"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lazy_static"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "libc"
version = "0.2.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "memchr"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "num-traits"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "pyembed"
version = "0.2.0"
dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"cpython 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)",
"jemalloc-sys 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
"python3-sys 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)",
"uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "python3-sys"
version = "0.2.1"
source = "git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0#2093cc5ab9b29d7db2255a0df836d89e440754b6"
dependencies = [
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_jitter 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_chacha"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_core"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "rand_hc"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_isaac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_jitter"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_os"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_pcg"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_xorshift"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex"
version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thread_local"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ucd-util"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "utf8-ranges"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "uuid"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "winapi"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum aho-corasick 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)" = "36b7aa1ccb7d7ea3f437cf025a2ab1c47cc6c1bc9fc84918ff449def12f5e282"
"checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf"
"checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd"
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)" = "39f75544d7bbaf57560d2168f28fd649ff9c76153874db88bdbdfd839b1a7e7d"
"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum cpython 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)" = "<none>"
"checksum fs_extra 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5f2a4a2034423744d2cc7ca2068453168dcdb82c438419e639a26bd87839c674"
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
"checksum jemalloc-sys 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45"
"checksum jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69"
"checksum jemallocator-global 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "991b61de8365c8b5707cf6cabbff98cfd6eaca9b851948b883efea408c7f581e"
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)" = "3262021842bf00fe07dbd6cf34ff25c99d7a7ebef8deea84db72be3ea3bb0aff"
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
"checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32"
"checksum python3-sys 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)" = "<none>"
"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
"checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
"checksum rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0e7a549d590831370895ab7ba4ea0c1b6b011d106b5ff2da6eee112615e6dc0"
"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
"checksum rand_jitter 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b"
"checksum rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
"checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
"checksum regex 1.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "d9d8297cc20bbb6184f8b45ff61c8ee6a9ac56c156cec8e38c3e5084773c44ad"
"checksum regex-syntax 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "9b01330cce219c1c6b2e209e5ed64ccd587ae5c67bed91c0b49eecf02ae40e21"
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
"checksum utf8-ranges 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "9d50aa7650df78abf942826607c62468ce18d9019673d4a2ebe1865dbb96ffde"
"checksum uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)" = "90dbc611eb48397705a6b0f6e917da23ae517e4d127123d2cf7674206627d32a"
"checksum winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f10e386af2b13e47c89e7236a7a14a086791a2b88ebad6df9bf42040195cf770"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

15
Cargo.toml Normal file
View file

@ -0,0 +1,15 @@
[package]
name = "insee_translator"
version = "0.1.0"
authors = ["Gabriel Augendre <gabriel@augendre.info>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
jemallocator-global = { version = "0.3", optional = true }
pyembed = { path = "pyembed" }
[features]
default = []
jemalloc = ["jemallocator-global", "pyembed/jemalloc"]

0
main/__init__.py Normal file
View file

View file

@ -158,4 +158,5 @@ def main():
pprint.pprint(data.to_dict()) pprint.pprint(data.to_dict())
if __name__ == '__main__':
main() main()

25
pyembed/Cargo.toml Normal file
View file

@ -0,0 +1,25 @@
[package]
name = "pyembed"
version = "0.2.0"
authors = ["Gregory Szorc <gregory.szorc@gmail.com>"]
edition = "2018"
build = "build.rs"
[dependencies]
byteorder = "1"
jemalloc-sys = { version = "0.3", optional = true }
libc = "0.2"
uuid = { version = "0.7", features = ["v4"] }
[dependencies.python3-sys]
git = "https://github.com/indygreg/PyOxidizer.git"
tag = "v0.2.0"
[dependencies.cpython]
git = "https://github.com/indygreg/PyOxidizer.git"
tag = "v0.2.0"
features = ["link-mode-unresolved-static", "python3-sys", "no-auto-initialize"]
[features]
default = []
jemalloc = ["jemalloc-sys"]

65
pyembed/build.rs Normal file
View file

@ -0,0 +1,65 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
use std::env;
use std::path::PathBuf;
use std::process;
/// Path to pyoxidizer executable this file was created with.
const DEFAULT_PYOXIDIZER_EXE: &str = r#"/Users/gaugendre/.cargo/bin/pyoxidizer"#;
fn main() {
// We support using pre-built artifacts, in which case we emit the
// cargo metadata lines from the "original" build to "register" the
// artifacts with this cargo invocation.
if env::var("PYOXIDIZER_REUSE_ARTIFACTS").is_ok() {
let artifact_dir_env = env::var("PYOXIDIZER_ARTIFACT_DIR");
let artifact_dir_path = match artifact_dir_env {
Ok(ref v) => PathBuf::from(v),
Err(_) => {
let out_dir = env::var("OUT_DIR").unwrap();
PathBuf::from(&out_dir)
}
};
println!(
"using pre-built artifacts from {}",
artifact_dir_path.display()
);
println!("cargo:rerun-if-env-changed=PYOXIDIZER_REUSE_ARTIFACTS");
println!("cargo:rerun-if-env-changed=PYOXIDIZER_ARTIFACT_DIR");
// Emit the cargo metadata lines to register libraries for linking.
let cargo_metadata_path = artifact_dir_path.join("cargo_metadata.txt");
let metadata = std::fs::read_to_string(&cargo_metadata_path)
.expect(format!("failed to read {}", cargo_metadata_path.display()).as_str());
println!("{}", metadata);
} else {
let pyoxidizer_exe = match env::var("PYOXIDIZER_EXE") {
Ok(value) => value,
Err(_) => DEFAULT_PYOXIDIZER_EXE.to_string(),
};
let pyoxidizer_path = PathBuf::from(&pyoxidizer_exe);
if !pyoxidizer_path.exists() {
panic!("pyoxidizer executable does not exist: {}", &pyoxidizer_exe);
}
match process::Command::new(&pyoxidizer_exe)
.arg("run-build-script")
.arg("build.rs")
.status()
{
Ok(status) => {
if !status.success() {
panic!("`pyoxidizer run-build-script` failed");
}
}
Err(e) => panic!("`pyoxidizer run-build-script` failed: {}", e.to_string()),
}
}
}

118
pyembed/src/config.rs Normal file
View file

@ -0,0 +1,118 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//! Data structures for configuring a Python interpreter.
/// Defines which allocator to use for the raw domain.
#[derive(Clone, Debug)]
pub enum PythonRawAllocator {
/// Use jemalloc.
Jemalloc,
/// Use the Rust global allocator.
Rust,
/// Use the system allocator.
System,
}
/// Defines Python code to run.
#[derive(Clone, Debug)]
pub enum PythonRunMode {
/// No-op.
None,
/// Run a Python REPL.
Repl,
/// Run a Python module as the main module.
Module { module: String },
/// Evaluate Python code from a string.
Eval { code: String },
}
/// Holds the configuration of an embedded Python interpreter.
///
/// Instances of this struct can be used to construct Python interpreters.
///
/// Each instance contains the total state to define the run-time behavior of
/// a Python interpreter.
#[derive(Clone, Debug)]
pub struct PythonConfig {
/// Name of the current program to tell to Python.
pub program_name: String,
/// Name of encoding for stdio handles.
pub standard_io_encoding: Option<String>,
/// Name of encoding error mode for stdio handles.
pub standard_io_errors: Option<String>,
/// Python optimization level.
pub opt_level: i32,
/// Whether to load our custom frozen importlib bootstrap modules.
pub use_custom_importlib: bool,
/// Whether to load the filesystem-based sys.meta_path finder.
pub filesystem_importer: bool,
/// Filesystem paths to add to sys.path.
///
/// ``$ORIGIN`` will resolve to the directory of the application at
/// run-time.
pub sys_paths: Vec<String>,
/// Whether to load the site.py module at initialization time.
pub import_site: bool,
/// Whether to load a user-specific site module at initialization time.
pub import_user_site: bool,
/// Whether to ignore various PYTHON* environment variables.
pub ignore_python_env: bool,
/// Whether to suppress writing of ``.pyc`` files when importing ``.py``
/// files from the filesystem. This is typically irrelevant since modules
/// are imported from memory.
pub dont_write_bytecode: bool,
/// Whether stdout and stderr streams should be unbuffered.
pub unbuffered_stdio: bool,
/// Bytecode for the importlib._bootstrap / _frozen_importlib module.
pub frozen_importlib_data: &'static [u8],
/// Bytecode for the importlib._bootstrap_external / _frozen_importlib_external module.
pub frozen_importlib_external_data: &'static [u8],
/// Reference to raw Python modules data.
///
/// The referenced data is produced as part of PyOxidizer packaging. This
/// likely comes from an include_bytes!(...) of a file generated by PyOxidizer.
pub py_modules_data: &'static [u8],
/// Reference to raw Python resources data.
///
/// The referenced data is produced as part of PyOxidizer packaging. This
/// likely comes from an include_bytes!(...) of a file generated by PyOxidizer.
pub py_resources_data: &'static [u8],
/// Whether to set sys.argvb with bytes versions of process arguments.
///
/// On Windows, bytes will be UTF-16. On POSIX, bytes will be raw char*
/// values passed to `int main()`.
pub argvb: bool,
/// Which memory allocator to use for the raw domain.
pub raw_allocator: PythonRawAllocator,
/// Environment variable holding the directory to write a loaded modules file.
///
/// If this value is set and the environment it refers to is set,
/// on interpreter shutdown, we will write a ``modules-<random>`` file to
/// the directory specified containing a ``\n`` delimited list of modules
/// loaded in ``sys.modules``.
pub write_modules_directory_env: Option<String>,
/// Defines what code to run by default.
///
pub run: PythonRunMode,
}

5
pyembed/src/data.rs Normal file
View file

@ -0,0 +1,5 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
include!(env!("PYEMBED_DATA_RS_PATH"));

892
pyembed/src/importer.rs Normal file
View file

@ -0,0 +1,892 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
/*!
Functionality for a Python importer.
This module defines a Python meta path importer and associated functionality
for importing Python modules from memory.
*/
use std::cell::RefCell;
use std::collections::{HashMap, HashSet};
use std::ffi::CStr;
use std::io::Cursor;
use std::sync::Arc;
use byteorder::{LittleEndian, ReadBytesExt};
use cpython::exc::{FileNotFoundError, ImportError, RuntimeError, ValueError};
use cpython::{
py_class, py_class_impl, py_coerce_item, py_fn, NoArgs, ObjectProtocol, PyClone, PyDict, PyErr,
PyList, PyModule, PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
};
use python3_sys as pyffi;
use python3_sys::{PyBUF_READ, PyMemoryView_FromMemory};
use super::pyinterp::PYOXIDIZER_IMPORTER_NAME;
/// Obtain a Python memoryview referencing a memory slice.
///
/// New memoryview allows Python to access the underlying memory without
/// copying it.
#[inline]
fn get_memory_view(py: Python, data: &'static [u8]) -> Option<PyObject> {
let ptr = unsafe { PyMemoryView_FromMemory(data.as_ptr() as _, data.len() as _, PyBUF_READ) };
unsafe { PyObject::from_owned_ptr_opt(py, ptr) }
}
/// Holds pointers to Python module data in memory.
#[derive(Debug)]
struct PythonModuleData {
source: Option<&'static [u8]>,
bytecode: Option<&'static [u8]>,
}
impl PythonModuleData {
/// Obtain a PyMemoryView instance for source data.
fn get_source_memory_view(&self, py: Python) -> Option<PyObject> {
match self.source {
Some(data) => get_memory_view(py, data),
None => None,
}
}
/// Obtain a PyMemoryView instance for bytecode data.
fn get_bytecode_memory_view(&self, py: Python) -> Option<PyObject> {
match self.bytecode {
Some(data) => get_memory_view(py, data),
None => None,
}
}
}
/// Represents Python modules data in memory.
///
/// This is essentially an index over a raw backing blob.
struct PythonModulesData {
data: HashMap<&'static str, PythonModuleData>,
}
impl PythonModulesData {
/// Construct a new instance from a memory slice.
fn from(data: &'static [u8]) -> Result<PythonModulesData, &'static str> {
let mut reader = Cursor::new(data);
let count = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading count"))?;
let mut index = Vec::with_capacity(count as usize);
let mut total_names_length = 0;
let mut total_sources_length = 0;
for _ in 0..count {
let name_length = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading name length"))?
as usize;
let source_length = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading source length"))?
as usize;
let bytecode_length = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading bytecode length"))?
as usize;
index.push((name_length, source_length, bytecode_length));
total_names_length += name_length;
total_sources_length += source_length;
}
let mut res = HashMap::with_capacity(count as usize);
let sources_start_offset = reader.position() as usize + total_names_length;
let bytecodes_start_offset = sources_start_offset + total_sources_length;
let mut sources_current_offset: usize = 0;
let mut bytecodes_current_offset: usize = 0;
for (name_length, source_length, bytecode_length) in index {
let offset = reader.position() as usize;
let name =
unsafe { std::str::from_utf8_unchecked(&data[offset..offset + name_length]) };
let source_offset = sources_start_offset + sources_current_offset;
let source = if source_length > 0 {
Some(&data[source_offset..source_offset + source_length])
} else {
None
};
let bytecode_offset = bytecodes_start_offset + bytecodes_current_offset;
let bytecode = if bytecode_length > 0 {
Some(&data[bytecode_offset..bytecode_offset + bytecode_length])
} else {
None
};
reader.set_position(offset as u64 + name_length as u64);
sources_current_offset += source_length;
bytecodes_current_offset += bytecode_length;
res.insert(name, PythonModuleData { source, bytecode });
}
Ok(PythonModulesData { data: res })
}
}
/// Represents Python resources data in memory.
///
/// This is essentially an index over a raw backing blob.
struct PythonResourcesData {
packages: HashMap<&'static str, Arc<Box<HashMap<&'static str, &'static [u8]>>>>,
}
impl PythonResourcesData {
fn from(data: &'static [u8]) -> Result<PythonResourcesData, &'static str> {
let mut reader = Cursor::new(data);
let package_count = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading package count"))? as usize;
let mut index = Vec::with_capacity(package_count);
let mut total_names_length = 0;
for _ in 0..package_count {
let package_name_length = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading package name length"))?
as usize;
let resource_count = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading resource count"))?
as usize;
total_names_length += package_name_length;
let mut package_index = Vec::with_capacity(resource_count);
for _ in 0..resource_count {
let resource_name_length = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading resource name length"))?
as usize;
let resource_data_length = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading resource data length"))?
as usize;
total_names_length += resource_name_length;
package_index.push((resource_name_length, resource_data_length));
}
index.push((package_name_length, package_index));
}
let mut name_offset = reader.position() as usize;
let data_offset = name_offset + total_names_length;
let mut res = HashMap::new();
for (package_name_length, package_index) in index {
let package_name = unsafe {
std::str::from_utf8_unchecked(&data[name_offset..name_offset + package_name_length])
};
name_offset += package_name_length;
let mut package_data = Box::new(HashMap::new());
for (resource_name_length, resource_data_length) in package_index {
let resource_name = unsafe {
std::str::from_utf8_unchecked(
&data[name_offset..name_offset + resource_name_length],
)
};
name_offset += resource_name_length;
let resource_data = &data[data_offset..data_offset + resource_data_length];
package_data.insert(resource_name, resource_data);
}
res.insert(package_name, Arc::new(package_data));
}
Ok(PythonResourcesData { packages: res })
}
}
#[allow(unused_doc_comments)]
/// Python type to import modules.
///
/// This type implements the importlib.abc.MetaPathFinder interface for
/// finding/loading modules. It supports loading various flavors of modules,
/// allowing it to be the only registered sys.meta_path importer.
py_class!(class PyOxidizerFinder |py| {
data imp_module: PyModule;
data marshal_loads: PyObject;
data builtin_importer: PyObject;
data frozen_importer: PyObject;
data call_with_frames_removed: PyObject;
data module_spec_type: PyObject;
data decode_source: PyObject;
data exec_fn: PyObject;
data packages: HashSet<&'static str>;
data known_modules: KnownModules;
data resources: HashMap<&'static str, Arc<Box<HashMap<&'static str, &'static [u8]>>>>;
data resource_readers: RefCell<Box<HashMap<String, PyObject>>>;
// Start of importlib.abc.MetaPathFinder interface.
def find_spec(&self, fullname: &PyString, path: &PyObject, target: Option<PyObject> = None) -> PyResult<PyObject> {
let key = fullname.to_string(py)?;
if let Some(flavor) = self.known_modules(py).get(&*key) {
match flavor {
KnownModuleFlavor::Builtin => {
// BuiltinImporter.find_spec() always returns None if `path` is defined.
// And it doesn't use `target`. So don't proxy these values.
self.builtin_importer(py).call_method(py, "find_spec", (fullname,), None)
}
KnownModuleFlavor::Frozen => {
self.frozen_importer(py).call_method(py, "find_spec", (fullname, path, target), None)
}
KnownModuleFlavor::InMemory { .. } => {
let is_package = self.packages(py).contains(&*key);
// TODO consider setting origin and has_location so __file__ will be
// populated.
let kwargs = PyDict::new(py);
kwargs.set_item(py, "is_package", is_package)?;
self.module_spec_type(py).call(py, (fullname, self), Some(&kwargs))
}
}
} else {
Ok(py.None())
}
}
def find_module(&self, _fullname: &PyObject, _path: &PyObject) -> PyResult<PyObject> {
// Method is deprecated. Always returns None.
// We /could/ call find_spec(). Meh.
Ok(py.None())
}
def invalidate_caches(&self) -> PyResult<PyObject> {
Ok(py.None())
}
// End of importlib.abc.MetaPathFinder interface.
// Start of importlib.abc.Loader interface.
def create_module(&self, _spec: &PyObject) -> PyResult<PyObject> {
Ok(py.None())
}
def exec_module(&self, module: &PyObject) -> PyResult<PyObject> {
let name = module.getattr(py, "__name__")?;
let key = name.extract::<String>(py)?;
if let Some(flavor) = self.known_modules(py).get(&*key) {
match flavor {
KnownModuleFlavor::Builtin => {
self.builtin_importer(py).call_method(py, "exec_module", (module,), None)
},
KnownModuleFlavor::Frozen => {
self.frozen_importer(py).call_method(py, "exec_module", (module,), None)
},
KnownModuleFlavor::InMemory { module_data } => {
match module_data.get_bytecode_memory_view(py) {
Some(value) => {
let code = self.marshal_loads(py).call(py, (value,), None)?;
let exec_fn = self.exec_fn(py);
let dict = module.getattr(py, "__dict__")?;
self.call_with_frames_removed(py).call(py, (exec_fn, code, dict), None)
},
None => {
Err(PyErr::new::<ImportError, _>(py, ("cannot find code in memory", name)))
}
}
},
}
} else {
// Raising here might make more sense, as exec_module() shouldn't
// be called on the Loader that didn't create the module.
Ok(py.None())
}
}
// End of importlib.abc.Loader interface.
// Start of importlib.abc.InspectLoader interface.
def get_code(&self, fullname: &PyString) -> PyResult<PyObject> {
let key = fullname.to_string(py)?;
if let Some(flavor) = self.known_modules(py).get(&*key) {
match flavor {
KnownModuleFlavor::Frozen => {
let imp_module = self.imp_module(py);
imp_module.call(py, "get_frozen_object", (fullname,), None)
},
KnownModuleFlavor::InMemory { module_data } => {
match module_data.get_bytecode_memory_view(py) {
Some(value) => {
self.marshal_loads(py).call(py, (value,), None)
}
None => {
Err(PyErr::new::<ImportError, _>(py, ("cannot find code in memory", fullname)))
}
}
},
KnownModuleFlavor::Builtin => {
Ok(py.None())
}
}
} else {
Ok(py.None())
}
}
def get_source(&self, fullname: &PyString) -> PyResult<PyObject> {
let key = fullname.to_string(py)?;
if let Some(flavor) = self.known_modules(py).get(&*key) {
if let KnownModuleFlavor::InMemory { module_data } = flavor {
match module_data.get_source_memory_view(py) {
Some(value) => {
self.decode_source(py).call(py, (value,), None)
},
None => {
Err(PyErr::new::<ImportError, _>(py, ("source not available", fullname)))
}
}
} else {
Ok(py.None())
}
} else {
Ok(py.None())
}
}
// End of importlib.abc.InspectLoader interface.
// Support obtaining ResourceReader instances.
def get_resource_loader(&self, fullname: &PyString) -> PyResult<PyObject> {
let key = fullname.to_string(py)?;
// This should not happen since code below should not be recursive into this
// function.
let mut resource_readers = match self.resource_readers(py).try_borrow_mut() {
Ok(v) => v,
Err(_) => {
return Err(PyErr::new::<RuntimeError, _>(py, "resource reader already borrowed"));
}
};
// Return an existing instance if we have one.
if let Some(reader) = resource_readers.get(&*key) {
return Ok(reader.clone_ref(py));
}
// Only create a reader if the name is a package.
if self.packages(py).contains(&*key) {
// Not all packages have known resources.
let resources = match self.resources(py).get(&*key) {
Some(v) => v.clone(),
None => {
let h: Box<HashMap<&'static str, &'static [u8]>> = Box::new(HashMap::new());
Arc::new(h)
}
};
let reader = PyOxidizerResourceReader::create_instance(py, resources)?.into_object();
resource_readers.insert(key.to_string(), reader.clone_ref(py));
Ok(reader)
} else {
Ok(py.None())
}
}
});
#[allow(unused_doc_comments)]
/// Implements in-memory reading of resource data.
///
/// Implements importlib.abc.ResourceReader.
py_class!(class PyOxidizerResourceReader |py| {
data resources: Arc<Box<HashMap<&'static str, &'static [u8]>>>;
/// Returns an opened, file-like object for binary reading of the resource.
///
/// If the resource cannot be found, FileNotFoundError is raised.
def open_resource(&self, resource: &PyString) -> PyResult<PyObject> {
let key = resource.to_string(py)?;
if let Some(data) = self.resources(py).get(&*key) {
match get_memory_view(py, data) {
Some(mv) => {
let io_module = py.import("io")?;
let bytes_io = io_module.get(py, "BytesIO")?;
bytes_io.call(py, (mv,), None)
}
None => Err(PyErr::fetch(py))
}
} else {
Err(PyErr::new::<FileNotFoundError, _>(py, "resource not found"))
}
}
/// Returns the file system path to the resource.
///
/// If the resource does not concretely exist on the file system, raise
/// FileNotFoundError.
def resource_path(&self, _resource: &PyString) -> PyResult<PyObject> {
Err(PyErr::new::<FileNotFoundError, _>(py, "in-memory resources do not have filesystem paths"))
}
/// Returns True if the named name is considered a resource. FileNotFoundError
/// is raised if name does not exist.
def is_resource(&self, name: &PyString) -> PyResult<PyObject> {
let key = name.to_string(py)?;
if self.resources(py).contains_key(&*key) {
Ok(py.True().as_object().clone_ref(py))
} else {
Err(PyErr::new::<FileNotFoundError, _>(py, "resource not found"))
}
}
/// Returns an iterable of strings over the contents of the package.
///
/// Do note that it is not required that all names returned by the iterator be actual resources,
/// e.g. it is acceptable to return names for which is_resource() would be false.
///
/// Allowing non-resource names to be returned is to allow for situations where how a package
/// and its resources are stored are known a priori and the non-resource names would be useful.
/// For instance, returning subdirectory names is allowed so that when it is known that the
/// package and resources are stored on the file system then those subdirectory names can be
/// used directly.
def contents(&self) -> PyResult<PyObject> {
let resources = self.resources(py);
let mut names = Vec::with_capacity(resources.len());
for name in resources.keys() {
names.push(name.to_py_object(py));
}
let names_list = names.to_py_object(py);
Ok(names_list.as_object().clone_ref(py))
}
});
fn populate_packages(packages: &mut HashSet<&'static str>, name: &'static str) {
let mut search = name;
while let Some(idx) = search.rfind('.') {
packages.insert(&search[0..idx]);
search = &search[0..idx];
}
}
const DOC: &[u8] = b"Binary representation of Python modules\0";
/// Represents global module state to be passed at interpreter initialization time.
#[derive(Debug)]
pub struct InitModuleState {
/// Whether to register the filesystem importer on sys.meta_path.
pub register_filesystem_importer: bool,
/// Values to set on sys.path.
pub sys_paths: Vec<String>,
/// Raw data constituting Python module source code.
pub py_modules_data: &'static [u8],
/// Raw data constituting Python resources data.
pub py_resources_data: &'static [u8],
}
/// Holds reference to next module state struct.
///
/// This module state will be copied into the module's state when the
/// Python module is initialized.
pub static mut NEXT_MODULE_STATE: *const InitModuleState = std::ptr::null();
/// Represents which importer to use for known modules.
#[derive(Debug)]
enum KnownModuleFlavor {
Builtin,
Frozen,
InMemory { module_data: PythonModuleData },
}
type KnownModules = HashMap<&'static str, KnownModuleFlavor>;
/// State associated with each importer module instance.
///
/// We write per-module state to per-module instances of this struct so
/// we don't rely on global variables and so multiple importer modules can
/// exist without issue.
#[derive(Debug)]
struct ModuleState {
/// Whether to register PathFinder on sys.meta_path.
register_filesystem_importer: bool,
/// Values to set on sys.path.
sys_paths: Vec<String>,
/// Raw data constituting Python module source code.
py_modules_data: &'static [u8],
/// Raw data constituting Python resources data.
py_resources_data: &'static [u8],
/// Whether setup() has been called.
setup_called: bool,
}
/// Obtain the module state for an instance of our importer module.
///
/// Creates a Python exception on failure.
///
/// Doesn't do type checking that the PyModule is of the appropriate type.
fn get_module_state<'a>(py: Python, m: &'a PyModule) -> Result<&'a mut ModuleState, PyErr> {
let ptr = m.as_object().as_ptr();
let state = unsafe { pyffi::PyModule_GetState(ptr) as *mut ModuleState };
if state.is_null() {
let err = PyErr::new::<ValueError, _>(py, "unable to retrieve module state");
return Err(err);
}
Ok(unsafe { &mut *state })
}
/// Initialize the Python module object.
///
/// This is called as part of the PyInit_* function to create the internal
/// module object for the interpreter.
///
/// This receives a handle to the current Python interpreter and just-created
/// Python module instance. It populates the internal module state and registers
/// a _setup() on the module object for usage by Python.
///
/// Because this function accesses NEXT_MODULE_STATE, it should only be
/// called during interpreter initialization.
fn module_init(py: Python, m: &PyModule) -> PyResult<()> {
let mut state = get_module_state(py, m)?;
unsafe {
state.register_filesystem_importer = (*NEXT_MODULE_STATE).register_filesystem_importer;
// TODO we could move the value if we wanted to avoid the clone().
state.sys_paths = (*NEXT_MODULE_STATE).sys_paths.clone();
state.py_modules_data = (*NEXT_MODULE_STATE).py_modules_data;
state.py_resources_data = (*NEXT_MODULE_STATE).py_resources_data;
}
state.setup_called = false;
m.add(
py,
"_setup",
py_fn!(
py,
module_setup(
m: PyModule,
bootstrap_module: PyModule,
marshal_module: PyModule,
decode_source: PyObject
)
),
)?;
Ok(())
}
/// Called after module import/initialization to configure the importing mechanism.
///
/// This does the heavy work of configuring the importing mechanism.
///
/// This function should only be called once as part of
/// _frozen_importlib_external._install_external_importers().
fn module_setup(
py: Python,
m: PyModule,
bootstrap_module: PyModule,
marshal_module: PyModule,
decode_source: PyObject,
) -> PyResult<PyObject> {
let state = get_module_state(py, &m)?;
if state.setup_called {
return Err(PyErr::new::<RuntimeError, _>(
py,
"PyOxidizer _setup() already called",
));
}
state.setup_called = true;
let imp_module = bootstrap_module.get(py, "_imp")?;
let imp_module = imp_module.cast_into::<PyModule>(py)?;
let sys_module = bootstrap_module.get(py, "sys")?;
let sys_module = sys_module.cast_as::<PyModule>(py)?;
let meta_path_object = sys_module.get(py, "meta_path")?;
// We should be executing as part of
// _frozen_importlib_external._install_external_importers().
// _frozen_importlib._install() should have already been called and set up
// sys.meta_path with [BuiltinImporter, FrozenImporter]. Those should be the
// only meta path importers present.
let meta_path = meta_path_object.cast_as::<PyList>(py)?;
if meta_path.len(py) != 2 {
return Err(PyErr::new::<ValueError, _>(
py,
"sys.meta_path does not contain 2 values",
));
}
let builtin_importer = meta_path.get_item(py, 0);
let frozen_importer = meta_path.get_item(py, 1);
// It may seem inefficient to create a full HashMap of the parsed data instead of e.g.
// streaming it. But the overhead of iterators was measured to be more than building
// up a temporary HashMap.
let modules_data = match PythonModulesData::from(state.py_modules_data) {
Ok(v) => v,
Err(msg) => return Err(PyErr::new::<ValueError, _>(py, msg)),
};
// Populate our known module lookup table with entries from builtins, frozens, and
// finally us. Last write wins and has the same effect as registering our
// meta path importer first. This should be safe. If nothing else, it allows
// some builtins to be overwritten by .py implemented modules.
let mut known_modules = KnownModules::with_capacity(modules_data.data.len() + 10);
for i in 0.. {
let record = unsafe { pyffi::PyImport_Inittab.offset(i) };
if unsafe { *record }.name.is_null() {
break;
}
let name = unsafe { CStr::from_ptr((*record).name as _) };
let name_str = match name.to_str() {
Ok(v) => v,
Err(_) => {
return Err(PyErr::new::<ValueError, _>(
py,
"unable to parse PyImport_Inittab",
));
}
};
known_modules.insert(name_str, KnownModuleFlavor::Builtin);
}
for i in 0.. {
let record = unsafe { pyffi::PyImport_FrozenModules.offset(i) };
if unsafe { *record }.name.is_null() {
break;
}
let name = unsafe { CStr::from_ptr((*record).name as _) };
let name_str = match name.to_str() {
Ok(v) => v,
Err(_) => {
return Err(PyErr::new::<ValueError, _>(
py,
"unable to parse PyImport_FrozenModules",
));
}
};
known_modules.insert(name_str, KnownModuleFlavor::Frozen);
}
// TODO consider baking set of packages into embedded data.
let mut packages: HashSet<&'static str> = HashSet::with_capacity(modules_data.data.len());
for (name, record) in modules_data.data {
known_modules.insert(
name,
KnownModuleFlavor::InMemory {
module_data: record,
},
);
populate_packages(&mut packages, name);
}
let resources_data = match PythonResourcesData::from(state.py_resources_data) {
Ok(v) => v,
Err(msg) => return Err(PyErr::new::<ValueError, _>(py, msg)),
};
let marshal_loads = marshal_module.get(py, "loads")?;
let call_with_frames_removed = bootstrap_module.get(py, "_call_with_frames_removed")?;
let module_spec_type = bootstrap_module.get(py, "ModuleSpec")?;
let builtins_module =
match unsafe { PyObject::from_borrowed_ptr_opt(py, pyffi::PyEval_GetBuiltins()) } {
Some(o) => o.cast_into::<PyDict>(py),
None => {
return Err(PyErr::new::<ValueError, _>(
py,
"unable to obtain __builtins__",
));
}
}?;
let exec_fn = match builtins_module.get_item(py, "exec") {
Some(v) => v,
None => {
return Err(PyErr::new::<ValueError, _>(
py,
"could not obtain __builtins__.exec",
));
}
};
let resource_readers: RefCell<Box<HashMap<String, PyObject>>> =
RefCell::new(Box::new(HashMap::new()));
let unified_importer = PyOxidizerFinder::create_instance(
py,
imp_module,
marshal_loads,
builtin_importer,
frozen_importer,
call_with_frames_removed,
module_spec_type,
decode_source,
exec_fn,
packages,
known_modules,
resources_data.packages,
resource_readers,
)?;
meta_path_object.call_method(py, "clear", NoArgs, None)?;
meta_path_object.call_method(py, "append", (unified_importer,), None)?;
// At this point the importing mechanism is fully initialized to use our
// unified importer, which handles built-in, frozen, and in-memory imports.
// Because we're probably running during Py_Initialize() and stdlib modules
// may not be in-memory, we need to register and configure additional importers
// here, before continuing with Py_Initialize(), otherwise we may not find
// the standard library!
if state.register_filesystem_importer {
// This is what importlib._bootstrap_external usually does:
// supported_loaders = _get_supported_file_loaders()
// sys.path_hooks.extend([FileFinder.path_hook(*supported_loaders)])
// sys.meta_path.append(PathFinder)
let frozen_importlib_external = py.import("_frozen_importlib_external")?;
let loaders =
frozen_importlib_external.call(py, "_get_supported_file_loaders", NoArgs, None)?;
let loaders_list = loaders.cast_as::<PyList>(py)?;
let loaders_vec: Vec<PyObject> = loaders_list.iter(py).collect();
let loaders_tuple = PyTuple::new(py, loaders_vec.as_slice());
let file_finder = frozen_importlib_external.get(py, "FileFinder")?;
let path_hook = file_finder.call_method(py, "path_hook", loaders_tuple, None)?;
let path_hooks = sys_module.get(py, "path_hooks")?;
path_hooks.call_method(py, "append", (path_hook,), None)?;
let path_finder = frozen_importlib_external.get(py, "PathFinder")?;
let meta_path = sys_module.get(py, "meta_path")?;
meta_path.call_method(py, "append", (path_finder,), None)?;
}
// Ideally we should be calling Py_SetPath() before Py_Initialize() to set sys.path.
// But we tried to do this and only ran into problems due to string conversions,
// unwanted side-effects. Updating sys.path directly before it is used by PathFinder
// (which was just registered above) should have the same effect.
// Always clear out sys.path.
let sys_path = sys_module.get(py, "path")?;
sys_path.call_method(py, "clear", NoArgs, None)?;
// And repopulate it with entries from the config.
for path in &state.sys_paths {
let py_path = PyString::new(py, path.as_str());
sys_path.call_method(py, "append", (py_path,), None)?;
}
Ok(py.None())
}
static mut MODULE_DEF: pyffi::PyModuleDef = pyffi::PyModuleDef {
m_base: pyffi::PyModuleDef_HEAD_INIT,
m_name: std::ptr::null(),
m_doc: std::ptr::null(),
m_size: std::mem::size_of::<ModuleState>() as isize,
m_methods: 0 as *mut _,
m_slots: 0 as *mut _,
m_traverse: None,
m_clear: None,
m_free: None,
};
/// Module initialization function.
///
/// This creates the Python module object.
///
/// We don't use the macros in the cpython crate because they are somewhat
/// opinionated about how things should work. e.g. they call
/// PyEval_InitThreads(), which is undesired. We want total control.
#[allow(non_snake_case)]
pub extern "C" fn PyInit__pyoxidizer_importer() -> *mut pyffi::PyObject {
let py = unsafe { cpython::Python::assume_gil_acquired() };
// TRACKING RUST1.32 We can't call as_ptr() in const fn in Rust 1.31.
unsafe {
if MODULE_DEF.m_name.is_null() {
MODULE_DEF.m_name = PYOXIDIZER_IMPORTER_NAME.as_ptr() as *const _;
MODULE_DEF.m_doc = DOC.as_ptr() as *const _;
}
}
let module = unsafe { pyffi::PyModule_Create(&mut MODULE_DEF) };
if module.is_null() {
return module;
}
let module = match unsafe { PyObject::from_owned_ptr(py, module).cast_into::<PyModule>(py) } {
Ok(m) => m,
Err(e) => {
PyErr::from(e).restore(py);
return std::ptr::null_mut();
}
};
match module_init(py, &module) {
Ok(()) => module.into_object().steal_ptr(),
Err(e) => {
e.restore(py);
std::ptr::null_mut()
}
}
}

34
pyembed/src/lib.rs Normal file
View file

@ -0,0 +1,34 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
/*!
Manage an embedded Python interpreter.
The `pyembed` crate contains functionality for managing a Python interpreter
embedded in the current binary. This crate is typically used along with
[PyOxidizer](https://github.com/indygreg/PyOxidizer) for producing
self-contained binaries containing Python.
The most important types are [`PythonConfig`](struct.PythonConfig.html) and
[`MainPythonInterpreter`](struct.MainPythonInterpreter.html). A `PythonConfig`
defines how a Python interpreter is to behave. A `MainPythonInterpreter`
creates and manages that interpreter and serves as a high-level interface for
running code in the interpreter.
*/
mod config;
mod data;
mod importer;
mod pyalloc;
mod pyinterp;
mod pystr;
#[allow(unused_imports)]
pub use crate::config::PythonConfig;
#[allow(unused_imports)]
pub use crate::data::default_python_config;
#[allow(unused_imports)]
pub use crate::pyinterp::MainPythonInterpreter;

221
pyembed/src/pyalloc.rs Normal file
View file

@ -0,0 +1,221 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//! Custom Python memory allocators.
#[cfg(feature = "jemalloc-sys")]
use jemalloc_sys as jemallocffi;
use libc::{c_void, size_t};
use python3_sys as pyffi;
use std::alloc;
use std::collections::HashMap;
#[cfg(feature = "jemalloc-sys")]
use std::ptr::null_mut;
const MIN_ALIGN: usize = 16;
type RawAllocatorState = HashMap<*mut u8, alloc::Layout>;
/// Holds state for the raw memory allocator.
///
/// Ideally we wouldn't need to track state. But Rust's dealloc() API
/// requires passing in a Layout that matches the allocation. This means
/// we need to track the Layout for each allocation. This data structure
/// facilitates that.
///
/// TODO HashMap isn't thread safe and the Python raw allocator doesn't
/// hold the GIL. So we need a thread safe map or a mutex guarding access.
pub struct RawAllocator {
pub allocator: pyffi::PyMemAllocatorEx,
_state: Box<RawAllocatorState>,
}
extern "C" fn raw_rust_malloc(ctx: *mut c_void, size: size_t) -> *mut c_void {
// PyMem_RawMalloc()'s docs say: Requesting zero bytes returns a distinct
// non-NULL pointer if possible, as if PyMem_RawMalloc(1) had been called
// instead.
let size = match size {
0 => 1,
val => val,
};
unsafe {
let state = ctx as *mut RawAllocatorState;
let layout = alloc::Layout::from_size_align_unchecked(size, MIN_ALIGN);
let res = alloc::alloc(layout);
(*state).insert(res, layout);
//println!("allocated {} bytes to {:?}", size, res);
res as *mut c_void
}
}
extern "C" fn raw_rust_calloc(ctx: *mut c_void, nelem: size_t, elsize: size_t) -> *mut c_void {
// PyMem_RawCalloc()'s docs say: Requesting zero elements or elements of
// size zero bytes returns a distinct non-NULL pointer if possible, as if
// PyMem_RawCalloc(1, 1) had been called instead.
let size = match nelem * elsize {
0 => 1,
val => val,
};
unsafe {
let state = ctx as *mut RawAllocatorState;
let layout = alloc::Layout::from_size_align_unchecked(size, MIN_ALIGN);
let res = alloc::alloc_zeroed(layout);
(*state).insert(res, layout);
//println!("zero allocated {} bytes to {:?}", size, res);
res as *mut c_void
}
}
extern "C" fn raw_rust_realloc(
ctx: *mut c_void,
ptr: *mut c_void,
new_size: size_t,
) -> *mut c_void {
//println!("reallocating {:?} to {} bytes", ptr as *mut u8, new_size);
// PyMem_RawRealloc()'s docs say: If p is NULL, the call is equivalent to
// PyMem_RawMalloc(n); else if n is equal to zero, the memory block is
// resized but is not freed, and the returned pointer is non-NULL.
if ptr.is_null() {
return raw_rust_malloc(ctx, new_size);
}
let new_size = match new_size {
0 => 1,
val => val,
};
unsafe {
let state = ctx as *mut RawAllocatorState;
let layout = alloc::Layout::from_size_align_unchecked(new_size, MIN_ALIGN);
let key = ptr as *mut u8;
let old_layout = (*state)
.remove(&key)
.expect("original memory address not tracked");
let res = alloc::realloc(ptr as *mut u8, old_layout, new_size);
(*state).insert(res, layout);
res as *mut c_void
}
}
extern "C" fn raw_rust_free(ctx: *mut c_void, ptr: *mut c_void) {
if ptr.is_null() {
return;
}
//println!("freeing {:?}", ptr as *mut u8);
unsafe {
let state = ctx as *mut RawAllocatorState;
let key = ptr as *mut u8;
let layout = (*state)
.get(&key)
.expect(format!("could not find allocated memory record: {:?}", key).as_str());
alloc::dealloc(key, *layout);
(*state).remove(&key);
}
}
pub fn make_raw_rust_memory_allocator() -> RawAllocator {
// We need to allocate the HashMap on the heap so the pointer doesn't refer
// to the stack. We rebox and add the Box to our struct so lifetimes are
// managed.
let alloc = Box::new(HashMap::<*mut u8, alloc::Layout>::new());
let state = Box::into_raw(alloc);
let allocator = pyffi::PyMemAllocatorEx {
ctx: state as *mut c_void,
malloc: Some(raw_rust_malloc),
calloc: Some(raw_rust_calloc),
realloc: Some(raw_rust_realloc),
free: Some(raw_rust_free),
};
RawAllocator {
allocator,
_state: unsafe { Box::from_raw(state) },
}
}
// Now let's define a raw memory allocator that interfaces directly with jemalloc.
// This avoids the overhead of going through Rust's allocation layer.
#[cfg(feature = "jemalloc-sys")]
extern "C" fn raw_jemalloc_malloc(_ctx: *mut c_void, size: size_t) -> *mut c_void {
// PyMem_RawMalloc()'s docs say: Requesting zero bytes returns a distinct
// non-NULL pointer if possible, as if PyMem_RawMalloc(1) had been called
// instead.
let size = match size {
0 => 1,
val => val,
};
unsafe { jemallocffi::mallocx(size, 0) }
}
#[cfg(feature = "jemalloc-sys")]
extern "C" fn raw_jemalloc_calloc(_ctx: *mut c_void, nelem: size_t, elsize: size_t) -> *mut c_void {
// PyMem_RawCalloc()'s docs say: Requesting zero elements or elements of
// size zero bytes returns a distinct non-NULL pointer if possible, as if
// PyMem_RawCalloc(1, 1) had been called instead.
let size = match nelem * elsize {
0 => 1,
val => val,
};
unsafe { jemallocffi::mallocx(size, jemallocffi::MALLOCX_ZERO) }
}
#[cfg(feature = "jemalloc-sys")]
extern "C" fn raw_jemalloc_realloc(
ctx: *mut c_void,
ptr: *mut c_void,
new_size: size_t,
) -> *mut c_void {
// PyMem_RawRealloc()'s docs say: If p is NULL, the call is equivalent to
// PyMem_RawMalloc(n); else if n is equal to zero, the memory block is
// resized but is not freed, and the returned pointer is non-NULL.
if ptr.is_null() {
return raw_jemalloc_malloc(ctx, new_size);
}
let new_size = match new_size {
0 => 1,
val => val,
};
unsafe { jemallocffi::rallocx(ptr, new_size, 0) }
}
#[cfg(feature = "jemalloc-sys")]
extern "C" fn raw_jemalloc_free(_ctx: *mut c_void, ptr: *mut c_void) {
if ptr.is_null() {
return;
}
unsafe { jemallocffi::dallocx(ptr, 0) }
}
#[cfg(feature = "jemalloc-sys")]
pub fn make_raw_jemalloc_allocator() -> pyffi::PyMemAllocatorEx {
pyffi::PyMemAllocatorEx {
ctx: null_mut(),
malloc: Some(raw_jemalloc_malloc),
calloc: Some(raw_jemalloc_calloc),
realloc: Some(raw_jemalloc_realloc),
free: Some(raw_jemalloc_free),
}
}

776
pyembed/src/pyinterp.rs Normal file
View file

@ -0,0 +1,776 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//! Manage an embedded Python interpreter.
use libc::c_char;
use python3_sys as pyffi;
use std::collections::BTreeSet;
use std::env;
use std::ffi::CString;
use std::fs;
use std::io::Write;
use std::path::PathBuf;
use std::ptr::null;
use cpython::exc::ValueError;
use cpython::{
GILGuard, NoArgs, ObjectProtocol, PyClone, PyDict, PyErr, PyList, PyModule, PyObject, PyResult,
Python, PythonObject, ToPyObject,
};
use super::config::{PythonConfig, PythonRawAllocator, PythonRunMode};
use super::importer::PyInit__pyoxidizer_importer;
#[cfg(feature = "jemalloc-sys")]
use super::pyalloc::make_raw_jemalloc_allocator;
use super::pyalloc::{make_raw_rust_memory_allocator, RawAllocator};
use super::pystr::{osstring_to_bytes, osstring_to_str, OwnedPyStr};
pub const PYOXIDIZER_IMPORTER_NAME: &[u8] = b"_pyoxidizer_importer\0";
const FROZEN_IMPORTLIB_NAME: &[u8] = b"_frozen_importlib\0";
const FROZEN_IMPORTLIB_EXTERNAL_NAME: &[u8] = b"_frozen_importlib_external\0";
/// Represents the results of executing Python code with exception handling.
#[derive(Debug)]
pub enum PythonRunResult {
/// Code executed without raising an exception.
Ok {},
/// Code executed and raised an exception.
Err {},
/// Code executed and raised SystemExit with the specified exit code.
Exit { code: i32 },
}
fn make_custom_frozen_modules(config: &PythonConfig) -> [pyffi::_frozen; 3] {
[
pyffi::_frozen {
name: FROZEN_IMPORTLIB_NAME.as_ptr() as *const i8,
code: config.frozen_importlib_data.as_ptr(),
size: config.frozen_importlib_data.len() as i32,
},
pyffi::_frozen {
name: FROZEN_IMPORTLIB_EXTERNAL_NAME.as_ptr() as *const i8,
code: config.frozen_importlib_external_data.as_ptr(),
size: config.frozen_importlib_external_data.len() as i32,
},
pyffi::_frozen {
name: null(),
code: null(),
size: 0,
},
]
}
#[cfg(windows)]
extern "C" {
pub fn __acrt_iob_func(x: u32) -> *mut libc::FILE;
}
#[cfg(windows)]
fn stdin_to_file() -> *mut libc::FILE {
// The stdin symbol is made available by importing <stdio.h>. On Windows,
// stdin is defined in corecrt_wstdio.h as a `#define` that calls this
// internal CRT function. There's no exported symbol to use. So we
// emulate the behavior of the C code.
//
// Relying on an internal CRT symbol is probably wrong. But Microsoft
// typically keeps backwards compatibility for undocumented functions
// like this because people use them in the wild.
//
// An attempt was made to use fdopen(0) like we do on POSIX. However,
// this causes a crash. The Microsoft C Runtime is already bending over
// backwards to coerce its native HANDLEs into POSIX file descriptors.
// Even if there are other ways to coerce a FILE* from a HANDLE
// (_open_osfhandle() + _fdopen() might work), using the same function
// that <stdio.h> uses to obtain a FILE* seems like the least risky thing
// to do.
unsafe { __acrt_iob_func(0) }
}
#[cfg(unix)]
fn stdin_to_file() -> *mut libc::FILE {
unsafe { libc::fdopen(libc::STDIN_FILENO, &('r' as libc::c_char)) }
}
#[cfg(windows)]
fn stderr_to_file() -> *mut libc::FILE {
unsafe { __acrt_iob_func(2) }
}
#[cfg(unix)]
fn stderr_to_file() -> *mut libc::FILE {
unsafe { libc::fdopen(libc::STDERR_FILENO, &('w' as libc::c_char)) }
}
#[cfg(feature = "jemalloc-sys")]
fn raw_jemallocator() -> pyffi::PyMemAllocatorEx {
make_raw_jemalloc_allocator()
}
#[cfg(not(feature = "jemalloc-sys"))]
fn raw_jemallocator() -> pyffi::PyMemAllocatorEx {
panic!("jemalloc is not available in this build configuration");
}
/// Manages an embedded Python interpreter.
///
/// **Warning: Python interpreters have global state. There should only be a
/// single instance of this type per process.**
///
/// Instances must only be constructed through [`MainPythonInterpreter::new()`](#method.new).
///
/// This type and its various functionality is a glorified wrapper around the
/// Python C API. But there's a lot of added functionality on top of what the C
/// API provides.
///
/// Both the low-level `python3-sys` and higher-level `cpython` crates are used.
pub struct MainPythonInterpreter<'a> {
pub config: PythonConfig,
frozen_modules: [pyffi::_frozen; 3],
init_run: bool,
raw_allocator: Option<pyffi::PyMemAllocatorEx>,
raw_rust_allocator: Option<RawAllocator>,
gil: Option<GILGuard>,
py: Option<Python<'a>>,
program_name: Option<OwnedPyStr>,
}
impl<'a> MainPythonInterpreter<'a> {
/// Construct a Python interpreter from a configuration.
///
/// The Python interpreter is initialized as a side-effect. The GIL is held.
pub fn new(config: PythonConfig) -> Result<MainPythonInterpreter<'a>, &'static str> {
let (raw_allocator, raw_rust_allocator) = match config.raw_allocator {
PythonRawAllocator::Jemalloc => (Some(raw_jemallocator()), None),
PythonRawAllocator::Rust => (None, Some(make_raw_rust_memory_allocator())),
PythonRawAllocator::System => (None, None),
};
let frozen_modules = make_custom_frozen_modules(&config);
let mut res = MainPythonInterpreter {
config,
frozen_modules,
init_run: false,
raw_allocator,
raw_rust_allocator,
gil: None,
py: None,
program_name: None,
};
res.init()?;
Ok(res)
}
/// Initialize the interpreter.
///
/// This mutates global state in the Python interpreter according to the
/// bound config and initializes the Python interpreter.
///
/// After this is called, the embedded Python interpreter is ready to
/// execute custom code.
///
/// If called more than once, the function is a no-op from the perspective
/// of interpreter initialization.
///
/// Returns a Python instance which has the GIL acquired.
fn init(&mut self) -> Result<Python, &'static str> {
if self.init_run {
return Ok(self.acquire_gil());
}
let config = &self.config;
let exe = env::current_exe().or_else(|_| Err("could not obtain current exe"))?;
let origin = exe
.parent()
.ok_or_else(|| "unable to get exe parent")?
.display()
.to_string();
let sys_paths: Vec<String> = config
.sys_paths
.iter()
.map(|path| path.replace("$ORIGIN", &origin))
.collect();
// TODO should we call PyMem::SetupDebugHooks() if enabled?
if let Some(raw_allocator) = &self.raw_allocator {
unsafe {
let ptr = raw_allocator as *const _;
pyffi::PyMem_SetAllocator(
pyffi::PyMemAllocatorDomain::PYMEM_DOMAIN_RAW,
ptr as *mut _,
);
}
} else if let Some(raw_rust_allocator) = &self.raw_rust_allocator {
unsafe {
let ptr = &raw_rust_allocator.allocator as *const _;
pyffi::PyMem_SetAllocator(
pyffi::PyMemAllocatorDomain::PYMEM_DOMAIN_RAW,
ptr as *mut _,
);
}
}
// Module state is a bit wonky.
//
// Our in-memory importer relies on a special module which holds references
// to Python objects exposing module/resource data. This module is imported as
// part of initializing the Python interpreter.
//
// This Python module object needs to hold references to the raw Python module
// and resource data. Those references are defined by the InitModuleState struct.
//
// Unfortunately, we can't easily associate state with the interpreter before
// calling Py_Initialize(). And the module initialization function receives no
// arguments. Our solution is to update a global pointer to point at "our" state
// then call Py_Initialize(). The module will be initialized as part of calling
// Py_Initialize(). It will copy the contents at the pointer into the local
// module state and the global pointer will be unused after that. The end result
// is that we have no reliance on global variables outside of a short window
// between now and when Py_Initialize() is called.
//
// We could potentially do away with this global variable by using a closure for
// the initialization function. But this rabbit hole may involve gross hackery
// like dynamic module names. It probably isn't worth it.
// It is important for references in this struct to have a lifetime of at least
// that of the interpreter.
// TODO specify lifetimes so the compiler validates this for us.
let module_state = super::importer::InitModuleState {
register_filesystem_importer: self.config.filesystem_importer,
sys_paths,
py_modules_data: config.py_modules_data,
py_resources_data: config.py_resources_data,
};
if config.use_custom_importlib {
// Replace the frozen modules in the interpreter with our custom set
// that knows how to import from memory.
unsafe {
pyffi::PyImport_FrozenModules = self.frozen_modules.as_ptr();
}
// Register our _pyoxidizer_importer extension which provides importing functionality.
unsafe {
// name char* needs to live as long as the interpreter is active.
pyffi::PyImport_AppendInittab(
PYOXIDIZER_IMPORTER_NAME.as_ptr() as *const i8,
Some(PyInit__pyoxidizer_importer),
);
// Move pointer to our stack allocated instance. This pointer will be
// accessed when creating the Python module object, which should be
// done automatically as part of low-level interpreter initialization
// when calling Py_Initialize() below.
super::importer::NEXT_MODULE_STATE = &module_state;
}
}
let home =
OwnedPyStr::from_str(exe.to_str().ok_or_else(|| "unable to convert exe to str")?)?;
unsafe {
// Pointer needs to live for lifetime of interpreter.
pyffi::Py_SetPythonHome(home.as_wchar_ptr());
}
let program_name = OwnedPyStr::from_str(config.program_name.as_str())?;
unsafe {
pyffi::Py_SetProgramName(program_name.as_wchar_ptr());
}
// Value needs to live for lifetime of interpreter.
self.program_name = Some(program_name);
// If we don't call Py_SetPath(), Python has its own logic for initializing it.
// We set it to an empty string because we don't want any paths by default. If
// we do have defined paths, they will be set after Py_Initialize().
unsafe {
// Value is copied internally. So short lifetime is OK.
let value = OwnedPyStr::from_str("")?;
pyffi::Py_SetPath(value.as_wchar_ptr());
}
if let (Some(ref encoding), Some(ref errors)) =
(&config.standard_io_encoding, &config.standard_io_errors)
{
let cencoding = CString::new(encoding.clone())
.or_else(|_| Err("unable to convert encoding to C string"))?;
let cerrors = CString::new(errors.clone())
.or_else(|_| Err("unable to convert encoding error mode to C string"))?;
let res = unsafe {
pyffi::Py_SetStandardStreamEncoding(
cencoding.as_ptr() as *const i8,
cerrors.as_ptr() as *const i8,
)
};
if res != 0 {
return Err("unable to set standard stream encoding");
}
}
unsafe {
pyffi::Py_DontWriteBytecodeFlag = if config.dont_write_bytecode { 1 } else { 0 };
pyffi::Py_IgnoreEnvironmentFlag = if config.ignore_python_env { 1 } else { 0 };
pyffi::Py_NoSiteFlag = if config.import_site { 0 } else { 1 };
pyffi::Py_NoUserSiteDirectory = if config.import_user_site { 0 } else { 1 };
pyffi::Py_OptimizeFlag = config.opt_level;
pyffi::Py_UnbufferedStdioFlag = if config.unbuffered_stdio { 1 } else { 0 };
}
/* Pre-initialization functions we could support:
*
* PyObject_SetArenaAllocator()
* PySys_AddWarnOption()
* PySys_AddXOption()
* PySys_ResetWarnOptions()
*/
unsafe {
pyffi::Py_Initialize();
}
// We shouldn't be accessing this pointer after Py_Initialize(). And the
// memory is stack allocated and doesn't outlive this frame. We don't want
// to leave a stack pointer sitting around!
unsafe {
super::importer::NEXT_MODULE_STATE = std::ptr::null();
}
let py = unsafe { Python::assume_gil_acquired() };
self.py = Some(py);
self.init_run = true;
// env::args() panics if arguments aren't valid Unicode. But invalid
// Unicode arguments are possible and some applications may want to
// support them.
//
// env::args_os() provides access to the raw OsString instances, which
// will be derived from wchar_t on Windows and char* on POSIX. We can
// convert these to Python str instances using a platform-specific
// mechanism.
let args_objs = env::args_os()
.map(|os_arg| osstring_to_str(py, os_arg))
.collect::<Result<Vec<PyObject>, &'static str>>()?;
// This will steal the pointer to the elements and mem::forget them.
let args = PyList::new(py, &args_objs);
let argv = b"argv\0";
let res = args.with_borrowed_ptr(py, |args_ptr| unsafe {
pyffi::PySys_SetObject(argv.as_ptr() as *const i8, args_ptr)
});
match res {
0 => (),
_ => return Err("unable to set sys.argv"),
}
if config.argvb {
let args_objs: Vec<PyObject> = env::args_os()
.map(|os_arg| osstring_to_bytes(py, os_arg))
.collect();
let args = PyList::new(py, &args_objs);
let argvb = b"argvb\0";
let res = args.with_borrowed_ptr(py, |args_ptr| unsafe {
pyffi::PySys_SetObject(argvb.as_ptr() as *const i8, args_ptr)
});
match res {
0 => (),
_ => return Err("unable to set sys.argvb"),
}
}
// As a convention, sys.oxidized is set to indicate we are running from
// a self-contained application.
let oxidized = b"oxidized\0";
let res = py.True().with_borrowed_ptr(py, |py_true| unsafe {
pyffi::PySys_SetObject(oxidized.as_ptr() as *const i8, py_true)
});
match res {
0 => (),
_ => return Err("unable to set sys.oxidized"),
}
Ok(py)
}
/// Ensure the Python GIL is released.
pub fn release_gil(&mut self) {
if self.py.is_some() {
self.py = None;
self.gil = None;
}
}
/// Ensure the Python GIL is acquired, returning a handle on the interpreter.
pub fn acquire_gil(&mut self) -> Python<'a> {
match self.py {
Some(py) => py,
None => {
let gil = GILGuard::acquire();
let py = unsafe { Python::assume_gil_acquired() };
self.gil = Some(gil);
self.py = Some(py);
py
}
}
}
/// Runs the interpreter with the default code execution settings.
///
/// The crate was built with settings that configure what should be
/// executed by default. Those settings will be loaded and executed.
pub fn run(&mut self) -> PyResult<PyObject> {
// clone() to avoid issues mixing mutable and immutable borrows of self.
let run = self.config.run.clone();
let py = self.acquire_gil();
match run {
PythonRunMode::None => Ok(py.None()),
PythonRunMode::Repl => self.run_repl(),
PythonRunMode::Module { module } => self.run_module_as_main(&module),
PythonRunMode::Eval { code } => self.run_code(&code),
}
}
/// Handle a raised SystemExit exception.
///
/// This emulates the behavior in pythonrun.c:handle_system_exit() and
/// _Py_HandleSystemExit() but without the call to exit(), which we don't want.
fn handle_system_exit(&mut self, py: Python, err: PyErr) -> Result<i32, &'static str> {
std::io::stdout()
.flush()
.or_else(|_| Err("failed to flush stdout"))?;
let mut value = match err.pvalue {
Some(ref instance) => {
if instance.as_ptr() == py.None().as_ptr() {
return Ok(0);
}
instance.clone_ref(py)
}
None => {
return Ok(0);
}
};
if unsafe { pyffi::PyExceptionInstance_Check(value.as_ptr()) } != 0 {
// The error code should be in the "code" attribute.
if let Ok(code) = value.getattr(py, "code") {
if code == py.None() {
return Ok(0);
}
// Else pretend exc_value.code is the new exception value to use
// and fall through to below.
value = code;
}
}
if unsafe { pyffi::PyLong_Check(value.as_ptr()) } != 0 {
return Ok(unsafe { pyffi::PyLong_AsLong(value.as_ptr()) as i32 });
}
let sys_module = py
.import("sys")
.or_else(|_| Err("unable to obtain sys module"))?;
let stderr = sys_module.get(py, "stderr");
// This is a cargo cult from the canonical implementation.
unsafe { pyffi::PyErr_Clear() }
match stderr {
Ok(o) => unsafe {
pyffi::PyFile_WriteObject(value.as_ptr(), o.as_ptr(), pyffi::Py_PRINT_RAW);
},
Err(_) => {
unsafe {
pyffi::PyObject_Print(value.as_ptr(), stderr_to_file(), pyffi::Py_PRINT_RAW);
}
std::io::stderr()
.flush()
.or_else(|_| Err("failure to flush stderr"))?;
}
}
unsafe {
pyffi::PySys_WriteStderr(b"\n\0".as_ptr() as *const i8);
}
// This frees references to this exception, which may be necessary to avoid
// badness.
err.restore(py);
unsafe {
pyffi::PyErr_Clear();
}
Ok(1)
}
/// Runs the interpreter and handles any exception that was raised.
pub fn run_and_handle_error(&mut self) -> PythonRunResult {
// There are underdefined lifetime bugs at play here. There is no
// explicit lifetime for the PyObject's returned. If we don't have
// the local variable in scope, we can get into a situation where
// drop() on self is called before the PyObject's drop(). This is
// problematic because PyObject's drop() attempts to acquire the GIL.
// If the interpreter is shut down, there is no GIL to acquire, and
// we may segfault.
// TODO look into setting lifetimes properly so the compiler can
// prevent some issues.
let res = self.run();
let py = self.acquire_gil();
match res {
Ok(_) => PythonRunResult::Ok {},
Err(err) => {
// SystemExit is special in that PyErr_PrintEx() will call
// exit() if it is seen. So, we handle it manually so we can
// return an exit code instead of exiting.
// TODO surely the cpython crate offers a better way to do this...
err.restore(py);
let matches =
unsafe { pyffi::PyErr_ExceptionMatches(pyffi::PyExc_SystemExit) } != 0;
let err = cpython::PyErr::fetch(py);
if matches {
return PythonRunResult::Exit {
code: match self.handle_system_exit(py, err) {
Ok(code) => code,
Err(msg) => {
eprintln!("{}", msg);
1
}
},
};
}
self.print_err(err);
PythonRunResult::Err {}
}
}
}
/// Calls run() and resolves a suitable exit code.
pub fn run_as_main(&mut self) -> i32 {
match self.run_and_handle_error() {
PythonRunResult::Ok {} => 0,
PythonRunResult::Err {} => 1,
PythonRunResult::Exit { code } => code,
}
}
/// Runs a Python module as the __main__ module.
///
/// Returns the execution result of the module code.
///
/// The interpreter is automatically initialized if needed.
pub fn run_module_as_main(&mut self, name: &str) -> PyResult<PyObject> {
let py = self.acquire_gil();
// This is modeled after runpy.py:_run_module_as_main().
let main: PyModule = unsafe {
PyObject::from_borrowed_ptr(
py,
pyffi::PyImport_AddModule("__main__\0".as_ptr() as *const c_char),
)
.cast_into(py)?
};
let main_dict = main.dict(py);
let importlib_util = py.import("importlib.util")?;
let spec = importlib_util.call(py, "find_spec", (name,), None)?;
let loader = spec.getattr(py, "loader")?;
let code = loader.call_method(py, "get_code", (name,), None)?;
let origin = spec.getattr(py, "origin")?;
let cached = spec.getattr(py, "cached")?;
// TODO handle __package__.
main_dict.set_item(py, "__name__", "__main__")?;
main_dict.set_item(py, "__file__", origin)?;
main_dict.set_item(py, "__cached__", cached)?;
main_dict.set_item(py, "__doc__", py.None())?;
main_dict.set_item(py, "__loader__", loader)?;
main_dict.set_item(py, "__spec__", spec)?;
unsafe {
let globals = main_dict.as_object().as_ptr();
let res = pyffi::PyEval_EvalCode(code.as_ptr(), globals, globals);
if res.is_null() {
let err = PyErr::fetch(py);
err.print(py);
Err(PyErr::fetch(py))
} else {
Ok(PyObject::from_owned_ptr(py, res))
}
}
}
/// Start and run a Python REPL.
///
/// This emulates what CPython's main.c does.
///
/// The interpreter is automatically initialized if needed.
pub fn run_repl(&mut self) -> PyResult<PyObject> {
let py = self.acquire_gil();
unsafe {
pyffi::Py_InspectFlag = 0;
}
// readline is optional. We don't care if it fails.
if py.import("readline").is_ok() {}
let sys = py.import("sys")?;
if let Ok(hook) = sys.get(py, "__interactivehook__") {
hook.call(py, NoArgs, None)?;
}
let stdin_filename = "<stdin>";
let filename = CString::new(stdin_filename)
.or_else(|_| Err(PyErr::new::<ValueError, _>(py, "could not create CString")))?;
let mut cf = pyffi::PyCompilerFlags { cf_flags: 0 };
// TODO use return value.
unsafe {
let stdin = stdin_to_file();
pyffi::PyRun_AnyFileExFlags(stdin, filename.as_ptr() as *const c_char, 0, &mut cf)
};
Ok(py.None())
}
/// Runs Python code provided by a string.
///
/// This is similar to what ``python -c <code>`` would do.
///
/// The interpreter is automatically initialized if needed.
pub fn run_code(&mut self, code: &str) -> PyResult<PyObject> {
let py = self.acquire_gil();
let code = CString::new(code).or_else(|_| {
Err(PyErr::new::<ValueError, _>(
py,
"source code is not a valid C string",
))
})?;
unsafe {
let main = pyffi::PyImport_AddModule("__main__\0".as_ptr() as *const _);
if main.is_null() {
return Err(PyErr::fetch(py));
}
let main_dict = pyffi::PyModule_GetDict(main);
let res = pyffi::PyRun_StringFlags(
code.as_ptr() as *const _,
pyffi::Py_file_input,
main_dict,
main_dict,
std::ptr::null_mut(),
);
if res.is_null() {
Err(PyErr::fetch(py))
} else {
Ok(PyObject::from_owned_ptr(py, res))
}
}
}
/// Print a Python error.
///
/// Under the hood this calls ``PyErr_PrintEx()``, which may call
/// ``Py_Exit()`` and may write to stderr.
pub fn print_err(&mut self, err: PyErr) {
let py = self.acquire_gil();
err.print(py);
}
}
/// Write loaded Python modules to a directory.
///
/// Given a Python interpreter and a path to a directory, this will create a
/// file in that directory named ``modules-<UUID>`` and write a ``\n`` delimited
/// list of loaded names from ``sys.modules`` into that file.
fn write_modules_to_directory(py: Python, path: &PathBuf) -> Result<(), &'static str> {
// TODO this needs better error handling all over.
fs::create_dir_all(path).or_else(|_| Err("could not create directory for modules"))?;
let rand = uuid::Uuid::new_v4();
let path = path.join(format!("modules-{}", rand.to_string()));
let sys = py
.import("sys")
.or_else(|_| Err("could not obtain sys module"))?;
let modules = sys
.get(py, "modules")
.or_else(|_| Err("could not obtain sys.modules"))?;
let modules = modules
.cast_as::<PyDict>(py)
.or_else(|_| Err("sys.modules is not a dict"))?;
let mut names = BTreeSet::new();
for (key, _value) in modules.items(py) {
names.insert(
key.extract::<String>(py)
.or_else(|_| Err("module name is not a str"))?,
);
}
let mut f = fs::File::create(path).or_else(|_| Err("could not open file for writing"))?;
for name in names {
f.write_fmt(format_args!("{}\n", name))
.or_else(|_| Err("could not write"))?;
}
Ok(())
}
impl<'a> Drop for MainPythonInterpreter<'a> {
fn drop(&mut self) {
if let Some(key) = &self.config.write_modules_directory_env {
if let Ok(path) = env::var(key) {
let path = PathBuf::from(path);
let py = self.acquire_gil();
if let Err(msg) = write_modules_to_directory(py, &path) {
eprintln!("error writing modules file: {}", msg);
}
}
}
let _ = unsafe { pyffi::Py_FinalizeEx() };
}
}

98
pyembed/src/pystr.rs Normal file
View file

@ -0,0 +1,98 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//! Bridge Rust and Python string types.
use libc::{c_void, size_t, wchar_t};
use python3_sys as pyffi;
use std::ffi::{CString, OsString};
use std::ptr::null_mut;
#[cfg(target_family = "unix")]
use std::os::unix::ffi::OsStrExt;
#[cfg(target_family = "windows")]
use std::os::windows::prelude::OsStrExt;
use cpython::{PyObject, Python};
#[derive(Debug)]
pub struct OwnedPyStr {
data: *const wchar_t,
}
impl OwnedPyStr {
pub fn as_wchar_ptr(&self) -> *const wchar_t {
self.data
}
pub fn from_str(s: &str) -> Result<Self, &'static str> {
// We need to convert to a C string so there is a terminal NULL
// otherwise Py_DecodeLocale() can get confused.
let cs = CString::new(s).or_else(|_| Err("source string has NULL bytes"))?;
let size: *mut size_t = null_mut();
let ptr = unsafe { pyffi::Py_DecodeLocale(cs.as_ptr(), size) };
if ptr.is_null() {
Err("could not convert str to Python string")
} else {
Ok(OwnedPyStr { data: ptr })
}
}
}
impl Drop for OwnedPyStr {
fn drop(&mut self) {
unsafe { pyffi::PyMem_RawFree(self.data as *mut c_void) }
}
}
#[cfg(target_family = "unix")]
const SURROGATEESCAPE: &[u8] = b"surrogateescape\0";
#[cfg(target_family = "unix")]
pub fn osstring_to_str(py: Python, s: OsString) -> Result<PyObject, &'static str> {
// PyUnicode_DecodeLocaleAndSize says the input must have a trailing NULL.
// So use a CString for that.
let b = CString::new(s.as_bytes()).or_else(|_| Err("not a valid C string"))?;
unsafe {
let o = pyffi::PyUnicode_DecodeLocaleAndSize(
b.as_ptr() as *const i8,
b.to_bytes().len() as isize,
SURROGATEESCAPE.as_ptr() as *const i8,
);
Ok(PyObject::from_owned_ptr(py, o))
}
}
#[cfg(target_family = "windows")]
pub fn osstring_to_str(py: Python, s: OsString) -> Result<PyObject, &'static str> {
// Windows OsString should be valid UTF-16.
let w: Vec<u16> = s.encode_wide().collect();
unsafe {
Ok(PyObject::from_owned_ptr(
py,
pyffi::PyUnicode_FromWideChar(w.as_ptr(), w.len() as isize),
))
}
}
#[cfg(target_family = "unix")]
pub fn osstring_to_bytes(py: Python, s: OsString) -> PyObject {
let b = s.as_bytes();
unsafe {
let o = pyffi::PyBytes_FromStringAndSize(b.as_ptr() as *const i8, b.len() as isize);
PyObject::from_owned_ptr(py, o)
}
}
#[cfg(target_family = "windows")]
pub fn osstring_to_bytes(py: Python, s: OsString) -> PyObject {
let w: Vec<u16> = s.encode_wide().collect();
unsafe {
let o = pyffi::PyBytes_FromStringAndSize(w.as_ptr() as *const i8, w.len() as isize * 2);
PyObject::from_owned_ptr(py, o)
}
}

119
pyoxidizer.toml Normal file
View file

@ -0,0 +1,119 @@
# This file controls the PyOxidizer build configuration. See the
# pyoxidizer crate's documentation for extensive documentation
# on this file format.
[[build]]
application_name = "insee_translator"
[[embedded_python_config]]
raw_allocator = "jemalloc"
# dont_write_bytecode = true
# ignore_environment = true
# no_site = true
# no_user_site_directory = true
# optimize_level = 0
# stdio_encoding = "utf-8:strict"
# unbuffered_stdio = false
#write_modules_directory_env = "PYOXIDIZER_WRITE_MODULES_DIR"
# Windows doesn't support jemalloc.
[[embedded_python_config]]
build_target = "x86_64-pc-windows-msvc"
raw_allocator = "system"
[[packaging_rule]]
type = "stdlib-extensions-policy"
# Package all available extension modules from the Python distribution.
# The Python interpreter will be fully featured.
policy = "all"
# Only package the minimal set of extension modules needed to initialize
# a Python interpreter. Many common packages in Python's standard
# library won't work with this setting.
# policy = "minimal"
# Only package extension modules that don't require linking against
# non-Python libraries. e.g. will exclude support for OpenSSL, SQLite3,
# other features that require external libraries.
# policy = "no-libraries"
# Explicit list of extension modules from the distribution to include.
# [[packaging_rule]]
# type = "stdlib-extensions-explicit-includes"
# includes = ["binascii", "errno", "itertools", "math", "select", "_socket"]
# Explicit list of extension modules from the distribution to exclude.
# [[packaging_rule]
# type = "stdlib-extensions-explicit-excludes"
# excludes = ["_ssl"]
# Package the entire Python standard library without sources.
[[packaging_rule]]
type = "stdlib"
include_source = false
# Write out license files next to the produced binary.
[[packaging_rule]]
type = "write-license-files"
path = ""
# Package .py files discovered in a local directory.
[[packaging_rule]]
type = "package-root"
path = "."
packages = ["data", "main"]
# Package things from a populated virtualenv.
# [[packaging_rule]]
# type = "virtualenv"
# path = "/path/to/venv"
# Filter all resources collected so far through a filter of names
# in a file.
# [[packaging_rule]]
# type = "filter-include"
# files = ["/path/to/filter-file"]
# How Python should run by default. This is only needed if you
# call ``run()``. For applications customizing how the embedded
# Python interpreter is invoked, this section is not relevant.
[[embedded_python_run]]
# Run an interactive Python interpreter.
#mode = "repl"
# Import a Python module and run it.
mode = "module"
module = "main.main"
# Evaluate some Python code.
#mode = "eval"
#code = "import main; main.main()"
# END OF COMMON USER-ADJUSTED SETTINGS.
#
# Everything below this is typically managed by PyOxidizer and doesn't need
# to be updated by people.
[[python_distribution]]
build_target = "x86_64-apple-darwin"
url = "https://github.com/indygreg/python-build-standalone/releases/download/20190617/cpython-3.7.3-macos-20190618T0523.tar.zst"
sha256 = "6668202a3225892ce252eff4bb53a58ac058b6a413ab9d37c026a500c2a561ee"
[[python_distribution]]
build_target = "x86_64-pc-windows-msvc"
url = "https://github.com/indygreg/python-build-standalone/releases/download/20190617/cpython-3.7.3-windows-amd64-20190618T0516.tar.zst"
sha256 = "fd43554b5654a914846cf1c251d1ad366f46c7c4d20b7c44572251b533351221"
[[python_distribution]]
build_target = "x86_64-unknown-linux-gnu"
url = "https://github.com/indygreg/python-build-standalone/releases/download/20190617/cpython-3.7.3-linux64-20190618T0324.tar.zst"
sha256 = "d6b80a9723c124d6d193f8816fdb874ba6d56abfb35cbfcc2b27de53176d0620"
[[python_distribution]]
build_target = "x86_64-unknown-linux-musl"
url = "https://github.com/indygreg/python-build-standalone/releases/download/20190617/cpython-3.7.3-linux64-musl-20190618T0400.tar.zst"
sha256 = "2be2d109b82634b36685b89800887501b619ef946dda182e5a8ab5c7029a8136"
[[pyoxidizer]]
version = "0.2.0"
commit = ""

30
src/main.rs Normal file
View file

@ -0,0 +1,30 @@
use pyembed::{default_python_config, MainPythonInterpreter};
fn main() {
// The following code is in a block so the MainPythonInterpreter is destroyed in an
// orderly manner, before process exit.
let code = {
// Load the default Python configuration as derived by the PyOxidizer config
// file used at build time.
let config = default_python_config();
// Construct a new Python interpreter using that config, handling any errors
// from construction.
match MainPythonInterpreter::new(config) {
Ok(mut interp) => {
// And run it using the default run configuration as specified by the
// configuration. If an uncaught Python exception is raised, handle it.
// This includes the special SystemExit, which is a request to terminate the
// process.
interp.run_as_main()
}
Err(msg) => {
eprintln!("{}", msg);
1
}
}
};
// And exit the process according to code execution results.
std::process::exit(code);
}