Cleanup project

This commit is contained in:
Gabriel Augendre 2021-08-06 15:37:45 +02:00
parent 72cd87010d
commit aa4e102cae
28 changed files with 270 additions and 2854 deletions

33
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,33 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1
hooks:
- id: check-ast
types: [python]
- id: check-json
types: [json]
- id: check-toml
types: [toml]
- id: check-xml
types: [xml]
- id: check-yaml
types: [yaml]
- id: end-of-file-fixer
- id: check-merge-conflict
- id: pretty-format-json
args:
- --autofix
- --no-sort-keys
- id: trailing-whitespace
args:
- --markdown-linebreak-ext=md
- repo: https://github.com/timothycrosley/isort
rev: 5.9.2
hooks:
- id: isort
types: [python]
- repo: https://github.com/psf/black
rev: 21.6b0
hooks:
- id: black
types: [python]

351
Cargo.lock generated
View file

@ -1,351 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "aho-corasick"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "autocfg"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bitflags"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cc"
version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cloudabi"
version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "cpython"
version = "0.2.1"
source = "git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0#2093cc5ab9b29d7db2255a0df836d89e440754b6"
dependencies = [
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
"python3-sys 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)",
]
[[package]]
name = "fs_extra"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "insee_translator"
version = "0.1.0"
dependencies = [
"jemallocator-global 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"pyembed 0.2.0",
]
[[package]]
name = "jemalloc-sys"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)",
"fs_extra 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "jemallocator"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"jemalloc-sys 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "jemallocator-global"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lazy_static"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "libc"
version = "0.2.59"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "memchr"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "num-traits"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "pyembed"
version = "0.2.0"
dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"cpython 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)",
"jemalloc-sys 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
"python3-sys 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)",
"uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "python3-sys"
version = "0.2.1"
source = "git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0#2093cc5ab9b29d7db2255a0df836d89e440754b6"
dependencies = [
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_jitter 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_chacha"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_core"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_core"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "rand_hc"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_isaac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_jitter"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_os"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_pcg"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_xorshift"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rdrand"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex"
version = "1.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thread_local"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ucd-util"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "utf8-ranges"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "uuid"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "winapi"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum aho-corasick 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)" = "36b7aa1ccb7d7ea3f437cf025a2ab1c47cc6c1bc9fc84918ff449def12f5e282"
"checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf"
"checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd"
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)" = "39f75544d7bbaf57560d2168f28fd649ff9c76153874db88bdbdfd839b1a7e7d"
"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum cpython 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)" = "<none>"
"checksum fs_extra 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5f2a4a2034423744d2cc7ca2068453168dcdb82c438419e639a26bd87839c674"
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
"checksum jemalloc-sys 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45"
"checksum jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69"
"checksum jemallocator-global 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "991b61de8365c8b5707cf6cabbff98cfd6eaca9b851948b883efea408c7f581e"
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)" = "3262021842bf00fe07dbd6cf34ff25c99d7a7ebef8deea84db72be3ea3bb0aff"
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
"checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32"
"checksum python3-sys 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)" = "<none>"
"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef"
"checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
"checksum rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0e7a549d590831370895ab7ba4ea0c1b6b011d106b5ff2da6eee112615e6dc0"
"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4"
"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08"
"checksum rand_jitter 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b"
"checksum rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
"checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
"checksum regex 1.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "d9d8297cc20bbb6184f8b45ff61c8ee6a9ac56c156cec8e38c3e5084773c44ad"
"checksum regex-syntax 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "9b01330cce219c1c6b2e209e5ed64ccd587ae5c67bed91c0b49eecf02ae40e21"
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86"
"checksum utf8-ranges 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "9d50aa7650df78abf942826607c62468ce18d9019673d4a2ebe1865dbb96ffde"
"checksum uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)" = "90dbc611eb48397705a6b0f6e917da23ae517e4d127123d2cf7674206627d32a"
"checksum winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f10e386af2b13e47c89e7236a7a14a086791a2b88ebad6df9bf42040195cf770"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View file

@ -1,15 +0,0 @@
[package]
name = "insee_translator"
version = "0.1.0"
authors = ["Gabriel Augendre <gabriel@augendre.info>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
jemallocator-global = { version = "0.3", optional = true }
pyembed = { path = "pyembed" }
[features]
default = []
jemalloc = ["jemallocator-global", "pyembed/jemalloc"]

15
Pipfile
View file

@ -1,15 +0,0 @@
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true
[dev-packages]
[packages]
black = "*"
[requires]
python_version = "3.7"
[pipenv]
allow_prereleases = true

57
Pipfile.lock generated
View file

@ -1,57 +0,0 @@
{
"_meta": {
"hash": {
"sha256": "b132de3bc6e041e3fa5ab7a0feb2ee862f488ae8903790188641b70b5e595abd"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.7"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"appdirs": {
"hashes": [
"sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92",
"sha256:d8b24664561d0d34ddfaec54636d502d7cea6e29c3eaf68f3df6180863e2166e"
],
"version": "==1.4.3"
},
"attrs": {
"hashes": [
"sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79",
"sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399"
],
"version": "==19.1.0"
},
"black": {
"hashes": [
"sha256:09a9dcb7c46ed496a9850b76e4e825d6049ecd38b611f1224857a79bd985a8cf",
"sha256:68950ffd4d9169716bcb8719a56c07a2f4485354fec061cdd5910aa07369731c"
],
"index": "pypi",
"version": "==19.3b0"
},
"click": {
"hashes": [
"sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13",
"sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7"
],
"version": "==7.0"
},
"toml": {
"hashes": [
"sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
"sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e"
],
"version": "==0.10.0"
}
},
"develop": {}
}

View file

@ -2,6 +2,16 @@
Extract data from INSEE number (France) Extract data from INSEE number (France)
## Getting started
```shell
pyenv virtualenv 3.9.6 insee
pyenv local insee
poetry install
python insee_number_translator/main.py
python insee_number_translator/main.py 123456789123456
```
## Data sources ## Data sources
- cities : https://public.opendatasoft.com/explore/dataset/correspondance-code-insee-code-postal/export/ - cities : https://public.opendatasoft.com/explore/dataset/correspondance-code-insee-code-postal/export/

View file

View file

@ -1,29 +0,0 @@
# This expects a file named "correspondance-code-insee-code-postal.json"
# You can find one here : https://public.opendatasoft.com/explore/dataset/correspondance-code-insee-code-postal/export/
import json
with open("correspondance-code-insee-code-postal.json", "r") as f:
data = json.load(f)
mapping = dict()
failed = []
duplicates = []
for base_item in data:
item = base_item.get("fields")
if not item:
failed.append(base_item)
insee_com = item.get("insee_com")
if insee_com:
if insee_com in mapping:
duplicates.append(base_item)
mapping[insee_com] = {
"name": item.get("nom_comm"),
"zip_code": item.get("postal_code"),
}
else:
failed.append(base_item)
import pprint
with open("../data/cities.py", "w") as writef:
writef.write(pprint.pformat(mapping, indent=4, compact=True))
writef.flush()

View file

@ -3,9 +3,9 @@ import datetime
import pprint import pprint
import sys import sys
from data.departments import DEPARTMENTS
from data.cities import CITIES from data.cities import CITIES
from data.countries import COUNTRIES, CONTINENTS from data.countries import CONTINENTS, COUNTRIES
from data.departments import DEPARTMENTS
class InseeData: class InseeData:
@ -159,8 +159,8 @@ def main():
data = InseeData(number) data = InseeData(number)
print(data) print(data)
pprint.pprint(data.to_dict()) pprint.pprint(data.to_dict())
print('\n\n') print("\n\n")
if __name__ == '__main__': if __name__ == "__main__":
main() main()

View file

207
poetry.lock generated Normal file
View file

@ -0,0 +1,207 @@
[[package]]
name = "backports.entry-points-selectable"
version = "1.1.0"
description = "Compatibility shim providing selectable entry points for older implementations"
category = "dev"
optional = false
python-versions = ">=2.7"
[package.extras]
docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"]
testing = ["pytest (>=4.6)", "pytest-flake8", "pytest-cov", "pytest-black (>=0.3.7)", "pytest-mypy", "pytest-checkdocs (>=2.4)", "pytest-enabler (>=1.0.1)"]
[[package]]
name = "cfgv"
version = "3.3.0"
description = "Validate configuration and produce human readable error messages."
category = "dev"
optional = false
python-versions = ">=3.6.1"
[[package]]
name = "distlib"
version = "0.3.2"
description = "Distribution utilities"
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "filelock"
version = "3.0.12"
description = "A platform independent file lock."
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "identify"
version = "2.2.12"
description = "File identification library for Python"
category = "dev"
optional = false
python-versions = ">=3.6.1"
[package.extras]
license = ["editdistance-s"]
[[package]]
name = "nodeenv"
version = "1.6.0"
description = "Node.js virtual environment builder"
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "platformdirs"
version = "2.2.0"
description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
category = "dev"
optional = false
python-versions = ">=3.6"
[package.extras]
docs = ["Sphinx (>=4)", "furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)"]
test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)"]
[[package]]
name = "pre-commit"
version = "2.13.0"
description = "A framework for managing and maintaining multi-language pre-commit hooks."
category = "dev"
optional = false
python-versions = ">=3.6.1"
[package.dependencies]
cfgv = ">=2.0.0"
identify = ">=1.0.0"
nodeenv = ">=0.11.1"
pyyaml = ">=5.1"
toml = "*"
virtualenv = ">=20.0.8"
[[package]]
name = "pyyaml"
version = "5.4.1"
description = "YAML parser and emitter for Python"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
[[package]]
name = "six"
version = "1.16.0"
description = "Python 2 and 3 compatibility utilities"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]]
name = "toml"
version = "0.10.2"
description = "Python Library for Tom's Obvious, Minimal Language"
category = "dev"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]]
name = "virtualenv"
version = "20.7.0"
description = "Virtual Python Environment builder"
category = "dev"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
[package.dependencies]
"backports.entry-points-selectable" = ">=1.0.4"
distlib = ">=0.3.1,<1"
filelock = ">=3.0.0,<4"
platformdirs = ">=2,<3"
six = ">=1.9.0,<2"
[package.extras]
docs = ["proselint (>=0.10.2)", "sphinx (>=3)", "sphinx-argparse (>=0.2.5)", "sphinx-rtd-theme (>=0.4.3)", "towncrier (>=19.9.0rc1)"]
testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)", "packaging (>=20.0)"]
[metadata]
lock-version = "1.1"
python-versions = "^3.9"
content-hash = "c1cc0f0c13c0a4f97f0629dcf8460e2022e73b90e77bd99d1a3370815da0a11a"
[metadata.files]
"backports.entry-points-selectable" = [
{file = "backports.entry_points_selectable-1.1.0-py2.py3-none-any.whl", hash = "sha256:a6d9a871cde5e15b4c4a53e3d43ba890cc6861ec1332c9c2428c92f977192acc"},
{file = "backports.entry_points_selectable-1.1.0.tar.gz", hash = "sha256:988468260ec1c196dab6ae1149260e2f5472c9110334e5d51adcb77867361f6a"},
]
cfgv = [
{file = "cfgv-3.3.0-py2.py3-none-any.whl", hash = "sha256:b449c9c6118fe8cca7fa5e00b9ec60ba08145d281d52164230a69211c5d597a1"},
{file = "cfgv-3.3.0.tar.gz", hash = "sha256:9e600479b3b99e8af981ecdfc80a0296104ee610cab48a5ae4ffd0b668650eb1"},
]
distlib = [
{file = "distlib-0.3.2-py2.py3-none-any.whl", hash = "sha256:23e223426b28491b1ced97dc3bbe183027419dfc7982b4fa2f05d5f3ff10711c"},
{file = "distlib-0.3.2.zip", hash = "sha256:106fef6dc37dd8c0e2c0a60d3fca3e77460a48907f335fa28420463a6f799736"},
]
filelock = [
{file = "filelock-3.0.12-py3-none-any.whl", hash = "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"},
{file = "filelock-3.0.12.tar.gz", hash = "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59"},
]
identify = [
{file = "identify-2.2.12-py2.py3-none-any.whl", hash = "sha256:a510cbe155f39665625c8a4c4b4f9360cbce539f51f23f47836ab7dd852db541"},
{file = "identify-2.2.12.tar.gz", hash = "sha256:242332b3bdd45a8af1752d5d5a3afb12bee26f8e67c4be06e394f82d05ef1a4d"},
]
nodeenv = [
{file = "nodeenv-1.6.0-py2.py3-none-any.whl", hash = "sha256:621e6b7076565ddcacd2db0294c0381e01fd28945ab36bcf00f41c5daf63bef7"},
{file = "nodeenv-1.6.0.tar.gz", hash = "sha256:3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b"},
]
platformdirs = [
{file = "platformdirs-2.2.0-py3-none-any.whl", hash = "sha256:4666d822218db6a262bdfdc9c39d21f23b4cfdb08af331a81e92751daf6c866c"},
{file = "platformdirs-2.2.0.tar.gz", hash = "sha256:632daad3ab546bd8e6af0537d09805cec458dce201bccfe23012df73332e181e"},
]
pre-commit = [
{file = "pre_commit-2.13.0-py2.py3-none-any.whl", hash = "sha256:b679d0fddd5b9d6d98783ae5f10fd0c4c59954f375b70a58cbe1ce9bcf9809a4"},
{file = "pre_commit-2.13.0.tar.gz", hash = "sha256:764972c60693dc668ba8e86eb29654ec3144501310f7198742a767bec385a378"},
]
pyyaml = [
{file = "PyYAML-5.4.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922"},
{file = "PyYAML-5.4.1-cp27-cp27m-win32.whl", hash = "sha256:129def1b7c1bf22faffd67b8f3724645203b79d8f4cc81f674654d9902cb4393"},
{file = "PyYAML-5.4.1-cp27-cp27m-win_amd64.whl", hash = "sha256:4465124ef1b18d9ace298060f4eccc64b0850899ac4ac53294547536533800c8"},
{file = "PyYAML-5.4.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:bb4191dfc9306777bc594117aee052446b3fa88737cd13b7188d0e7aa8162185"},
{file = "PyYAML-5.4.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:6c78645d400265a062508ae399b60b8c167bf003db364ecb26dcab2bda048253"},
{file = "PyYAML-5.4.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:4e0583d24c881e14342eaf4ec5fbc97f934b999a6828693a99157fde912540cc"},
{file = "PyYAML-5.4.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:72a01f726a9c7851ca9bfad6fd09ca4e090a023c00945ea05ba1638c09dc3347"},
{file = "PyYAML-5.4.1-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:895f61ef02e8fed38159bb70f7e100e00f471eae2bc838cd0f4ebb21e28f8541"},
{file = "PyYAML-5.4.1-cp36-cp36m-win32.whl", hash = "sha256:3bd0e463264cf257d1ffd2e40223b197271046d09dadf73a0fe82b9c1fc385a5"},
{file = "PyYAML-5.4.1-cp36-cp36m-win_amd64.whl", hash = "sha256:e4fac90784481d221a8e4b1162afa7c47ed953be40d31ab4629ae917510051df"},
{file = "PyYAML-5.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5accb17103e43963b80e6f837831f38d314a0495500067cb25afab2e8d7a4018"},
{file = "PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e1d4970ea66be07ae37a3c2e48b5ec63f7ba6804bdddfdbd3cfd954d25a82e63"},
{file = "PyYAML-5.4.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:cb333c16912324fd5f769fff6bc5de372e9e7a202247b48870bc251ed40239aa"},
{file = "PyYAML-5.4.1-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:fe69978f3f768926cfa37b867e3843918e012cf83f680806599ddce33c2c68b0"},
{file = "PyYAML-5.4.1-cp37-cp37m-win32.whl", hash = "sha256:dd5de0646207f053eb0d6c74ae45ba98c3395a571a2891858e87df7c9b9bd51b"},
{file = "PyYAML-5.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:08682f6b72c722394747bddaf0aa62277e02557c0fd1c42cb853016a38f8dedf"},
{file = "PyYAML-5.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d2d9808ea7b4af864f35ea216be506ecec180628aced0704e34aca0b040ffe46"},
{file = "PyYAML-5.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8c1be557ee92a20f184922c7b6424e8ab6691788e6d86137c5d93c1a6ec1b8fb"},
{file = "PyYAML-5.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:fd7f6999a8070df521b6384004ef42833b9bd62cfee11a09bda1079b4b704247"},
{file = "PyYAML-5.4.1-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:bfb51918d4ff3d77c1c856a9699f8492c612cde32fd3bcd344af9be34999bfdc"},
{file = "PyYAML-5.4.1-cp38-cp38-win32.whl", hash = "sha256:fa5ae20527d8e831e8230cbffd9f8fe952815b2b7dae6ffec25318803a7528fc"},
{file = "PyYAML-5.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:0f5f5786c0e09baddcd8b4b45f20a7b5d61a7e7e99846e3c799b05c7c53fa696"},
{file = "PyYAML-5.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:294db365efa064d00b8d1ef65d8ea2c3426ac366c0c4368d930bf1c5fb497f77"},
{file = "PyYAML-5.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:74c1485f7707cf707a7aef42ef6322b8f97921bd89be2ab6317fd782c2d53183"},
{file = "PyYAML-5.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:d483ad4e639292c90170eb6f7783ad19490e7a8defb3e46f97dfe4bacae89122"},
{file = "PyYAML-5.4.1-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:fdc842473cd33f45ff6bce46aea678a54e3d21f1b61a7750ce3c498eedfe25d6"},
{file = "PyYAML-5.4.1-cp39-cp39-win32.whl", hash = "sha256:49d4cdd9065b9b6e206d0595fee27a96b5dd22618e7520c33204a4a3239d5b10"},
{file = "PyYAML-5.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:c20cfa2d49991c8b4147af39859b167664f2ad4561704ee74c1de03318e898db"},
{file = "PyYAML-5.4.1.tar.gz", hash = "sha256:607774cbba28732bfa802b54baa7484215f530991055bb562efbed5b2f20a45e"},
]
six = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
]
toml = [
{file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
]
virtualenv = [
{file = "virtualenv-20.7.0-py2.py3-none-any.whl", hash = "sha256:fdfdaaf0979ac03ae7f76d5224a05b58165f3c804f8aa633f3dd6f22fbd435d5"},
{file = "virtualenv-20.7.0.tar.gz", hash = "sha256:97066a978431ec096d163e72771df5357c5c898ffdd587048f45e0aecc228094"},
]

View file

@ -1,25 +0,0 @@
[package]
name = "pyembed"
version = "0.2.0"
authors = ["Gregory Szorc <gregory.szorc@gmail.com>"]
edition = "2018"
build = "build.rs"
[dependencies]
byteorder = "1"
jemalloc-sys = { version = "0.3", optional = true }
libc = "0.2"
uuid = { version = "0.7", features = ["v4"] }
[dependencies.python3-sys]
git = "https://github.com/indygreg/PyOxidizer.git"
tag = "v0.2.0"
[dependencies.cpython]
git = "https://github.com/indygreg/PyOxidizer.git"
tag = "v0.2.0"
features = ["link-mode-unresolved-static", "python3-sys", "no-auto-initialize"]
[features]
default = []
jemalloc = ["jemalloc-sys"]

View file

@ -1,65 +0,0 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
use std::env;
use std::path::PathBuf;
use std::process;
/// Path to pyoxidizer executable this file was created with.
const DEFAULT_PYOXIDIZER_EXE: &str = r#"/Users/gaugendre/.cargo/bin/pyoxidizer"#;
fn main() {
// We support using pre-built artifacts, in which case we emit the
// cargo metadata lines from the "original" build to "register" the
// artifacts with this cargo invocation.
if env::var("PYOXIDIZER_REUSE_ARTIFACTS").is_ok() {
let artifact_dir_env = env::var("PYOXIDIZER_ARTIFACT_DIR");
let artifact_dir_path = match artifact_dir_env {
Ok(ref v) => PathBuf::from(v),
Err(_) => {
let out_dir = env::var("OUT_DIR").unwrap();
PathBuf::from(&out_dir)
}
};
println!(
"using pre-built artifacts from {}",
artifact_dir_path.display()
);
println!("cargo:rerun-if-env-changed=PYOXIDIZER_REUSE_ARTIFACTS");
println!("cargo:rerun-if-env-changed=PYOXIDIZER_ARTIFACT_DIR");
// Emit the cargo metadata lines to register libraries for linking.
let cargo_metadata_path = artifact_dir_path.join("cargo_metadata.txt");
let metadata = std::fs::read_to_string(&cargo_metadata_path)
.expect(format!("failed to read {}", cargo_metadata_path.display()).as_str());
println!("{}", metadata);
} else {
let pyoxidizer_exe = match env::var("PYOXIDIZER_EXE") {
Ok(value) => value,
Err(_) => DEFAULT_PYOXIDIZER_EXE.to_string(),
};
let pyoxidizer_path = PathBuf::from(&pyoxidizer_exe);
if !pyoxidizer_path.exists() {
panic!("pyoxidizer executable does not exist: {}", &pyoxidizer_exe);
}
match process::Command::new(&pyoxidizer_exe)
.arg("run-build-script")
.arg("build.rs")
.status()
{
Ok(status) => {
if !status.success() {
panic!("`pyoxidizer run-build-script` failed");
}
}
Err(e) => panic!("`pyoxidizer run-build-script` failed: {}", e.to_string()),
}
}
}

View file

@ -1,118 +0,0 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//! Data structures for configuring a Python interpreter.
/// Defines which allocator to use for the raw domain.
#[derive(Clone, Debug)]
pub enum PythonRawAllocator {
/// Use jemalloc.
Jemalloc,
/// Use the Rust global allocator.
Rust,
/// Use the system allocator.
System,
}
/// Defines Python code to run.
#[derive(Clone, Debug)]
pub enum PythonRunMode {
/// No-op.
None,
/// Run a Python REPL.
Repl,
/// Run a Python module as the main module.
Module { module: String },
/// Evaluate Python code from a string.
Eval { code: String },
}
/// Holds the configuration of an embedded Python interpreter.
///
/// Instances of this struct can be used to construct Python interpreters.
///
/// Each instance contains the total state to define the run-time behavior of
/// a Python interpreter.
#[derive(Clone, Debug)]
pub struct PythonConfig {
/// Name of the current program to tell to Python.
pub program_name: String,
/// Name of encoding for stdio handles.
pub standard_io_encoding: Option<String>,
/// Name of encoding error mode for stdio handles.
pub standard_io_errors: Option<String>,
/// Python optimization level.
pub opt_level: i32,
/// Whether to load our custom frozen importlib bootstrap modules.
pub use_custom_importlib: bool,
/// Whether to load the filesystem-based sys.meta_path finder.
pub filesystem_importer: bool,
/// Filesystem paths to add to sys.path.
///
/// ``$ORIGIN`` will resolve to the directory of the application at
/// run-time.
pub sys_paths: Vec<String>,
/// Whether to load the site.py module at initialization time.
pub import_site: bool,
/// Whether to load a user-specific site module at initialization time.
pub import_user_site: bool,
/// Whether to ignore various PYTHON* environment variables.
pub ignore_python_env: bool,
/// Whether to suppress writing of ``.pyc`` files when importing ``.py``
/// files from the filesystem. This is typically irrelevant since modules
/// are imported from memory.
pub dont_write_bytecode: bool,
/// Whether stdout and stderr streams should be unbuffered.
pub unbuffered_stdio: bool,
/// Bytecode for the importlib._bootstrap / _frozen_importlib module.
pub frozen_importlib_data: &'static [u8],
/// Bytecode for the importlib._bootstrap_external / _frozen_importlib_external module.
pub frozen_importlib_external_data: &'static [u8],
/// Reference to raw Python modules data.
///
/// The referenced data is produced as part of PyOxidizer packaging. This
/// likely comes from an include_bytes!(...) of a file generated by PyOxidizer.
pub py_modules_data: &'static [u8],
/// Reference to raw Python resources data.
///
/// The referenced data is produced as part of PyOxidizer packaging. This
/// likely comes from an include_bytes!(...) of a file generated by PyOxidizer.
pub py_resources_data: &'static [u8],
/// Whether to set sys.argvb with bytes versions of process arguments.
///
/// On Windows, bytes will be UTF-16. On POSIX, bytes will be raw char*
/// values passed to `int main()`.
pub argvb: bool,
/// Which memory allocator to use for the raw domain.
pub raw_allocator: PythonRawAllocator,
/// Environment variable holding the directory to write a loaded modules file.
///
/// If this value is set and the environment it refers to is set,
/// on interpreter shutdown, we will write a ``modules-<random>`` file to
/// the directory specified containing a ``\n`` delimited list of modules
/// loaded in ``sys.modules``.
pub write_modules_directory_env: Option<String>,
/// Defines what code to run by default.
///
pub run: PythonRunMode,
}

View file

@ -1,5 +0,0 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
include!(env!("PYEMBED_DATA_RS_PATH"));

View file

@ -1,892 +0,0 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
/*!
Functionality for a Python importer.
This module defines a Python meta path importer and associated functionality
for importing Python modules from memory.
*/
use std::cell::RefCell;
use std::collections::{HashMap, HashSet};
use std::ffi::CStr;
use std::io::Cursor;
use std::sync::Arc;
use byteorder::{LittleEndian, ReadBytesExt};
use cpython::exc::{FileNotFoundError, ImportError, RuntimeError, ValueError};
use cpython::{
py_class, py_class_impl, py_coerce_item, py_fn, NoArgs, ObjectProtocol, PyClone, PyDict, PyErr,
PyList, PyModule, PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
};
use python3_sys as pyffi;
use python3_sys::{PyBUF_READ, PyMemoryView_FromMemory};
use super::pyinterp::PYOXIDIZER_IMPORTER_NAME;
/// Obtain a Python memoryview referencing a memory slice.
///
/// New memoryview allows Python to access the underlying memory without
/// copying it.
#[inline]
fn get_memory_view(py: Python, data: &'static [u8]) -> Option<PyObject> {
let ptr = unsafe { PyMemoryView_FromMemory(data.as_ptr() as _, data.len() as _, PyBUF_READ) };
unsafe { PyObject::from_owned_ptr_opt(py, ptr) }
}
/// Holds pointers to Python module data in memory.
#[derive(Debug)]
struct PythonModuleData {
source: Option<&'static [u8]>,
bytecode: Option<&'static [u8]>,
}
impl PythonModuleData {
/// Obtain a PyMemoryView instance for source data.
fn get_source_memory_view(&self, py: Python) -> Option<PyObject> {
match self.source {
Some(data) => get_memory_view(py, data),
None => None,
}
}
/// Obtain a PyMemoryView instance for bytecode data.
fn get_bytecode_memory_view(&self, py: Python) -> Option<PyObject> {
match self.bytecode {
Some(data) => get_memory_view(py, data),
None => None,
}
}
}
/// Represents Python modules data in memory.
///
/// This is essentially an index over a raw backing blob.
struct PythonModulesData {
data: HashMap<&'static str, PythonModuleData>,
}
impl PythonModulesData {
/// Construct a new instance from a memory slice.
fn from(data: &'static [u8]) -> Result<PythonModulesData, &'static str> {
let mut reader = Cursor::new(data);
let count = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading count"))?;
let mut index = Vec::with_capacity(count as usize);
let mut total_names_length = 0;
let mut total_sources_length = 0;
for _ in 0..count {
let name_length = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading name length"))?
as usize;
let source_length = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading source length"))?
as usize;
let bytecode_length = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading bytecode length"))?
as usize;
index.push((name_length, source_length, bytecode_length));
total_names_length += name_length;
total_sources_length += source_length;
}
let mut res = HashMap::with_capacity(count as usize);
let sources_start_offset = reader.position() as usize + total_names_length;
let bytecodes_start_offset = sources_start_offset + total_sources_length;
let mut sources_current_offset: usize = 0;
let mut bytecodes_current_offset: usize = 0;
for (name_length, source_length, bytecode_length) in index {
let offset = reader.position() as usize;
let name =
unsafe { std::str::from_utf8_unchecked(&data[offset..offset + name_length]) };
let source_offset = sources_start_offset + sources_current_offset;
let source = if source_length > 0 {
Some(&data[source_offset..source_offset + source_length])
} else {
None
};
let bytecode_offset = bytecodes_start_offset + bytecodes_current_offset;
let bytecode = if bytecode_length > 0 {
Some(&data[bytecode_offset..bytecode_offset + bytecode_length])
} else {
None
};
reader.set_position(offset as u64 + name_length as u64);
sources_current_offset += source_length;
bytecodes_current_offset += bytecode_length;
res.insert(name, PythonModuleData { source, bytecode });
}
Ok(PythonModulesData { data: res })
}
}
/// Represents Python resources data in memory.
///
/// This is essentially an index over a raw backing blob.
struct PythonResourcesData {
packages: HashMap<&'static str, Arc<Box<HashMap<&'static str, &'static [u8]>>>>,
}
impl PythonResourcesData {
fn from(data: &'static [u8]) -> Result<PythonResourcesData, &'static str> {
let mut reader = Cursor::new(data);
let package_count = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading package count"))? as usize;
let mut index = Vec::with_capacity(package_count);
let mut total_names_length = 0;
for _ in 0..package_count {
let package_name_length = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading package name length"))?
as usize;
let resource_count = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading resource count"))?
as usize;
total_names_length += package_name_length;
let mut package_index = Vec::with_capacity(resource_count);
for _ in 0..resource_count {
let resource_name_length = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading resource name length"))?
as usize;
let resource_data_length = reader
.read_u32::<LittleEndian>()
.or_else(|_| Err("failed reading resource data length"))?
as usize;
total_names_length += resource_name_length;
package_index.push((resource_name_length, resource_data_length));
}
index.push((package_name_length, package_index));
}
let mut name_offset = reader.position() as usize;
let data_offset = name_offset + total_names_length;
let mut res = HashMap::new();
for (package_name_length, package_index) in index {
let package_name = unsafe {
std::str::from_utf8_unchecked(&data[name_offset..name_offset + package_name_length])
};
name_offset += package_name_length;
let mut package_data = Box::new(HashMap::new());
for (resource_name_length, resource_data_length) in package_index {
let resource_name = unsafe {
std::str::from_utf8_unchecked(
&data[name_offset..name_offset + resource_name_length],
)
};
name_offset += resource_name_length;
let resource_data = &data[data_offset..data_offset + resource_data_length];
package_data.insert(resource_name, resource_data);
}
res.insert(package_name, Arc::new(package_data));
}
Ok(PythonResourcesData { packages: res })
}
}
#[allow(unused_doc_comments)]
/// Python type to import modules.
///
/// This type implements the importlib.abc.MetaPathFinder interface for
/// finding/loading modules. It supports loading various flavors of modules,
/// allowing it to be the only registered sys.meta_path importer.
py_class!(class PyOxidizerFinder |py| {
data imp_module: PyModule;
data marshal_loads: PyObject;
data builtin_importer: PyObject;
data frozen_importer: PyObject;
data call_with_frames_removed: PyObject;
data module_spec_type: PyObject;
data decode_source: PyObject;
data exec_fn: PyObject;
data packages: HashSet<&'static str>;
data known_modules: KnownModules;
data resources: HashMap<&'static str, Arc<Box<HashMap<&'static str, &'static [u8]>>>>;
data resource_readers: RefCell<Box<HashMap<String, PyObject>>>;
// Start of importlib.abc.MetaPathFinder interface.
def find_spec(&self, fullname: &PyString, path: &PyObject, target: Option<PyObject> = None) -> PyResult<PyObject> {
let key = fullname.to_string(py)?;
if let Some(flavor) = self.known_modules(py).get(&*key) {
match flavor {
KnownModuleFlavor::Builtin => {
// BuiltinImporter.find_spec() always returns None if `path` is defined.
// And it doesn't use `target`. So don't proxy these values.
self.builtin_importer(py).call_method(py, "find_spec", (fullname,), None)
}
KnownModuleFlavor::Frozen => {
self.frozen_importer(py).call_method(py, "find_spec", (fullname, path, target), None)
}
KnownModuleFlavor::InMemory { .. } => {
let is_package = self.packages(py).contains(&*key);
// TODO consider setting origin and has_location so __file__ will be
// populated.
let kwargs = PyDict::new(py);
kwargs.set_item(py, "is_package", is_package)?;
self.module_spec_type(py).call(py, (fullname, self), Some(&kwargs))
}
}
} else {
Ok(py.None())
}
}
def find_module(&self, _fullname: &PyObject, _path: &PyObject) -> PyResult<PyObject> {
// Method is deprecated. Always returns None.
// We /could/ call find_spec(). Meh.
Ok(py.None())
}
def invalidate_caches(&self) -> PyResult<PyObject> {
Ok(py.None())
}
// End of importlib.abc.MetaPathFinder interface.
// Start of importlib.abc.Loader interface.
def create_module(&self, _spec: &PyObject) -> PyResult<PyObject> {
Ok(py.None())
}
def exec_module(&self, module: &PyObject) -> PyResult<PyObject> {
let name = module.getattr(py, "__name__")?;
let key = name.extract::<String>(py)?;
if let Some(flavor) = self.known_modules(py).get(&*key) {
match flavor {
KnownModuleFlavor::Builtin => {
self.builtin_importer(py).call_method(py, "exec_module", (module,), None)
},
KnownModuleFlavor::Frozen => {
self.frozen_importer(py).call_method(py, "exec_module", (module,), None)
},
KnownModuleFlavor::InMemory { module_data } => {
match module_data.get_bytecode_memory_view(py) {
Some(value) => {
let code = self.marshal_loads(py).call(py, (value,), None)?;
let exec_fn = self.exec_fn(py);
let dict = module.getattr(py, "__dict__")?;
self.call_with_frames_removed(py).call(py, (exec_fn, code, dict), None)
},
None => {
Err(PyErr::new::<ImportError, _>(py, ("cannot find code in memory", name)))
}
}
},
}
} else {
// Raising here might make more sense, as exec_module() shouldn't
// be called on the Loader that didn't create the module.
Ok(py.None())
}
}
// End of importlib.abc.Loader interface.
// Start of importlib.abc.InspectLoader interface.
def get_code(&self, fullname: &PyString) -> PyResult<PyObject> {
let key = fullname.to_string(py)?;
if let Some(flavor) = self.known_modules(py).get(&*key) {
match flavor {
KnownModuleFlavor::Frozen => {
let imp_module = self.imp_module(py);
imp_module.call(py, "get_frozen_object", (fullname,), None)
},
KnownModuleFlavor::InMemory { module_data } => {
match module_data.get_bytecode_memory_view(py) {
Some(value) => {
self.marshal_loads(py).call(py, (value,), None)
}
None => {
Err(PyErr::new::<ImportError, _>(py, ("cannot find code in memory", fullname)))
}
}
},
KnownModuleFlavor::Builtin => {
Ok(py.None())
}
}
} else {
Ok(py.None())
}
}
def get_source(&self, fullname: &PyString) -> PyResult<PyObject> {
let key = fullname.to_string(py)?;
if let Some(flavor) = self.known_modules(py).get(&*key) {
if let KnownModuleFlavor::InMemory { module_data } = flavor {
match module_data.get_source_memory_view(py) {
Some(value) => {
self.decode_source(py).call(py, (value,), None)
},
None => {
Err(PyErr::new::<ImportError, _>(py, ("source not available", fullname)))
}
}
} else {
Ok(py.None())
}
} else {
Ok(py.None())
}
}
// End of importlib.abc.InspectLoader interface.
// Support obtaining ResourceReader instances.
def get_resource_loader(&self, fullname: &PyString) -> PyResult<PyObject> {
let key = fullname.to_string(py)?;
// This should not happen since code below should not be recursive into this
// function.
let mut resource_readers = match self.resource_readers(py).try_borrow_mut() {
Ok(v) => v,
Err(_) => {
return Err(PyErr::new::<RuntimeError, _>(py, "resource reader already borrowed"));
}
};
// Return an existing instance if we have one.
if let Some(reader) = resource_readers.get(&*key) {
return Ok(reader.clone_ref(py));
}
// Only create a reader if the name is a package.
if self.packages(py).contains(&*key) {
// Not all packages have known resources.
let resources = match self.resources(py).get(&*key) {
Some(v) => v.clone(),
None => {
let h: Box<HashMap<&'static str, &'static [u8]>> = Box::new(HashMap::new());
Arc::new(h)
}
};
let reader = PyOxidizerResourceReader::create_instance(py, resources)?.into_object();
resource_readers.insert(key.to_string(), reader.clone_ref(py));
Ok(reader)
} else {
Ok(py.None())
}
}
});
#[allow(unused_doc_comments)]
/// Implements in-memory reading of resource data.
///
/// Implements importlib.abc.ResourceReader.
py_class!(class PyOxidizerResourceReader |py| {
data resources: Arc<Box<HashMap<&'static str, &'static [u8]>>>;
/// Returns an opened, file-like object for binary reading of the resource.
///
/// If the resource cannot be found, FileNotFoundError is raised.
def open_resource(&self, resource: &PyString) -> PyResult<PyObject> {
let key = resource.to_string(py)?;
if let Some(data) = self.resources(py).get(&*key) {
match get_memory_view(py, data) {
Some(mv) => {
let io_module = py.import("io")?;
let bytes_io = io_module.get(py, "BytesIO")?;
bytes_io.call(py, (mv,), None)
}
None => Err(PyErr::fetch(py))
}
} else {
Err(PyErr::new::<FileNotFoundError, _>(py, "resource not found"))
}
}
/// Returns the file system path to the resource.
///
/// If the resource does not concretely exist on the file system, raise
/// FileNotFoundError.
def resource_path(&self, _resource: &PyString) -> PyResult<PyObject> {
Err(PyErr::new::<FileNotFoundError, _>(py, "in-memory resources do not have filesystem paths"))
}
/// Returns True if the named name is considered a resource. FileNotFoundError
/// is raised if name does not exist.
def is_resource(&self, name: &PyString) -> PyResult<PyObject> {
let key = name.to_string(py)?;
if self.resources(py).contains_key(&*key) {
Ok(py.True().as_object().clone_ref(py))
} else {
Err(PyErr::new::<FileNotFoundError, _>(py, "resource not found"))
}
}
/// Returns an iterable of strings over the contents of the package.
///
/// Do note that it is not required that all names returned by the iterator be actual resources,
/// e.g. it is acceptable to return names for which is_resource() would be false.
///
/// Allowing non-resource names to be returned is to allow for situations where how a package
/// and its resources are stored are known a priori and the non-resource names would be useful.
/// For instance, returning subdirectory names is allowed so that when it is known that the
/// package and resources are stored on the file system then those subdirectory names can be
/// used directly.
def contents(&self) -> PyResult<PyObject> {
let resources = self.resources(py);
let mut names = Vec::with_capacity(resources.len());
for name in resources.keys() {
names.push(name.to_py_object(py));
}
let names_list = names.to_py_object(py);
Ok(names_list.as_object().clone_ref(py))
}
});
fn populate_packages(packages: &mut HashSet<&'static str>, name: &'static str) {
let mut search = name;
while let Some(idx) = search.rfind('.') {
packages.insert(&search[0..idx]);
search = &search[0..idx];
}
}
const DOC: &[u8] = b"Binary representation of Python modules\0";
/// Represents global module state to be passed at interpreter initialization time.
#[derive(Debug)]
pub struct InitModuleState {
/// Whether to register the filesystem importer on sys.meta_path.
pub register_filesystem_importer: bool,
/// Values to set on sys.path.
pub sys_paths: Vec<String>,
/// Raw data constituting Python module source code.
pub py_modules_data: &'static [u8],
/// Raw data constituting Python resources data.
pub py_resources_data: &'static [u8],
}
/// Holds reference to next module state struct.
///
/// This module state will be copied into the module's state when the
/// Python module is initialized.
pub static mut NEXT_MODULE_STATE: *const InitModuleState = std::ptr::null();
/// Represents which importer to use for known modules.
#[derive(Debug)]
enum KnownModuleFlavor {
Builtin,
Frozen,
InMemory { module_data: PythonModuleData },
}
type KnownModules = HashMap<&'static str, KnownModuleFlavor>;
/// State associated with each importer module instance.
///
/// We write per-module state to per-module instances of this struct so
/// we don't rely on global variables and so multiple importer modules can
/// exist without issue.
#[derive(Debug)]
struct ModuleState {
/// Whether to register PathFinder on sys.meta_path.
register_filesystem_importer: bool,
/// Values to set on sys.path.
sys_paths: Vec<String>,
/// Raw data constituting Python module source code.
py_modules_data: &'static [u8],
/// Raw data constituting Python resources data.
py_resources_data: &'static [u8],
/// Whether setup() has been called.
setup_called: bool,
}
/// Obtain the module state for an instance of our importer module.
///
/// Creates a Python exception on failure.
///
/// Doesn't do type checking that the PyModule is of the appropriate type.
fn get_module_state<'a>(py: Python, m: &'a PyModule) -> Result<&'a mut ModuleState, PyErr> {
let ptr = m.as_object().as_ptr();
let state = unsafe { pyffi::PyModule_GetState(ptr) as *mut ModuleState };
if state.is_null() {
let err = PyErr::new::<ValueError, _>(py, "unable to retrieve module state");
return Err(err);
}
Ok(unsafe { &mut *state })
}
/// Initialize the Python module object.
///
/// This is called as part of the PyInit_* function to create the internal
/// module object for the interpreter.
///
/// This receives a handle to the current Python interpreter and just-created
/// Python module instance. It populates the internal module state and registers
/// a _setup() on the module object for usage by Python.
///
/// Because this function accesses NEXT_MODULE_STATE, it should only be
/// called during interpreter initialization.
fn module_init(py: Python, m: &PyModule) -> PyResult<()> {
let mut state = get_module_state(py, m)?;
unsafe {
state.register_filesystem_importer = (*NEXT_MODULE_STATE).register_filesystem_importer;
// TODO we could move the value if we wanted to avoid the clone().
state.sys_paths = (*NEXT_MODULE_STATE).sys_paths.clone();
state.py_modules_data = (*NEXT_MODULE_STATE).py_modules_data;
state.py_resources_data = (*NEXT_MODULE_STATE).py_resources_data;
}
state.setup_called = false;
m.add(
py,
"_setup",
py_fn!(
py,
module_setup(
m: PyModule,
bootstrap_module: PyModule,
marshal_module: PyModule,
decode_source: PyObject
)
),
)?;
Ok(())
}
/// Called after module import/initialization to configure the importing mechanism.
///
/// This does the heavy work of configuring the importing mechanism.
///
/// This function should only be called once as part of
/// _frozen_importlib_external._install_external_importers().
fn module_setup(
py: Python,
m: PyModule,
bootstrap_module: PyModule,
marshal_module: PyModule,
decode_source: PyObject,
) -> PyResult<PyObject> {
let state = get_module_state(py, &m)?;
if state.setup_called {
return Err(PyErr::new::<RuntimeError, _>(
py,
"PyOxidizer _setup() already called",
));
}
state.setup_called = true;
let imp_module = bootstrap_module.get(py, "_imp")?;
let imp_module = imp_module.cast_into::<PyModule>(py)?;
let sys_module = bootstrap_module.get(py, "sys")?;
let sys_module = sys_module.cast_as::<PyModule>(py)?;
let meta_path_object = sys_module.get(py, "meta_path")?;
// We should be executing as part of
// _frozen_importlib_external._install_external_importers().
// _frozen_importlib._install() should have already been called and set up
// sys.meta_path with [BuiltinImporter, FrozenImporter]. Those should be the
// only meta path importers present.
let meta_path = meta_path_object.cast_as::<PyList>(py)?;
if meta_path.len(py) != 2 {
return Err(PyErr::new::<ValueError, _>(
py,
"sys.meta_path does not contain 2 values",
));
}
let builtin_importer = meta_path.get_item(py, 0);
let frozen_importer = meta_path.get_item(py, 1);
// It may seem inefficient to create a full HashMap of the parsed data instead of e.g.
// streaming it. But the overhead of iterators was measured to be more than building
// up a temporary HashMap.
let modules_data = match PythonModulesData::from(state.py_modules_data) {
Ok(v) => v,
Err(msg) => return Err(PyErr::new::<ValueError, _>(py, msg)),
};
// Populate our known module lookup table with entries from builtins, frozens, and
// finally us. Last write wins and has the same effect as registering our
// meta path importer first. This should be safe. If nothing else, it allows
// some builtins to be overwritten by .py implemented modules.
let mut known_modules = KnownModules::with_capacity(modules_data.data.len() + 10);
for i in 0.. {
let record = unsafe { pyffi::PyImport_Inittab.offset(i) };
if unsafe { *record }.name.is_null() {
break;
}
let name = unsafe { CStr::from_ptr((*record).name as _) };
let name_str = match name.to_str() {
Ok(v) => v,
Err(_) => {
return Err(PyErr::new::<ValueError, _>(
py,
"unable to parse PyImport_Inittab",
));
}
};
known_modules.insert(name_str, KnownModuleFlavor::Builtin);
}
for i in 0.. {
let record = unsafe { pyffi::PyImport_FrozenModules.offset(i) };
if unsafe { *record }.name.is_null() {
break;
}
let name = unsafe { CStr::from_ptr((*record).name as _) };
let name_str = match name.to_str() {
Ok(v) => v,
Err(_) => {
return Err(PyErr::new::<ValueError, _>(
py,
"unable to parse PyImport_FrozenModules",
));
}
};
known_modules.insert(name_str, KnownModuleFlavor::Frozen);
}
// TODO consider baking set of packages into embedded data.
let mut packages: HashSet<&'static str> = HashSet::with_capacity(modules_data.data.len());
for (name, record) in modules_data.data {
known_modules.insert(
name,
KnownModuleFlavor::InMemory {
module_data: record,
},
);
populate_packages(&mut packages, name);
}
let resources_data = match PythonResourcesData::from(state.py_resources_data) {
Ok(v) => v,
Err(msg) => return Err(PyErr::new::<ValueError, _>(py, msg)),
};
let marshal_loads = marshal_module.get(py, "loads")?;
let call_with_frames_removed = bootstrap_module.get(py, "_call_with_frames_removed")?;
let module_spec_type = bootstrap_module.get(py, "ModuleSpec")?;
let builtins_module =
match unsafe { PyObject::from_borrowed_ptr_opt(py, pyffi::PyEval_GetBuiltins()) } {
Some(o) => o.cast_into::<PyDict>(py),
None => {
return Err(PyErr::new::<ValueError, _>(
py,
"unable to obtain __builtins__",
));
}
}?;
let exec_fn = match builtins_module.get_item(py, "exec") {
Some(v) => v,
None => {
return Err(PyErr::new::<ValueError, _>(
py,
"could not obtain __builtins__.exec",
));
}
};
let resource_readers: RefCell<Box<HashMap<String, PyObject>>> =
RefCell::new(Box::new(HashMap::new()));
let unified_importer = PyOxidizerFinder::create_instance(
py,
imp_module,
marshal_loads,
builtin_importer,
frozen_importer,
call_with_frames_removed,
module_spec_type,
decode_source,
exec_fn,
packages,
known_modules,
resources_data.packages,
resource_readers,
)?;
meta_path_object.call_method(py, "clear", NoArgs, None)?;
meta_path_object.call_method(py, "append", (unified_importer,), None)?;
// At this point the importing mechanism is fully initialized to use our
// unified importer, which handles built-in, frozen, and in-memory imports.
// Because we're probably running during Py_Initialize() and stdlib modules
// may not be in-memory, we need to register and configure additional importers
// here, before continuing with Py_Initialize(), otherwise we may not find
// the standard library!
if state.register_filesystem_importer {
// This is what importlib._bootstrap_external usually does:
// supported_loaders = _get_supported_file_loaders()
// sys.path_hooks.extend([FileFinder.path_hook(*supported_loaders)])
// sys.meta_path.append(PathFinder)
let frozen_importlib_external = py.import("_frozen_importlib_external")?;
let loaders =
frozen_importlib_external.call(py, "_get_supported_file_loaders", NoArgs, None)?;
let loaders_list = loaders.cast_as::<PyList>(py)?;
let loaders_vec: Vec<PyObject> = loaders_list.iter(py).collect();
let loaders_tuple = PyTuple::new(py, loaders_vec.as_slice());
let file_finder = frozen_importlib_external.get(py, "FileFinder")?;
let path_hook = file_finder.call_method(py, "path_hook", loaders_tuple, None)?;
let path_hooks = sys_module.get(py, "path_hooks")?;
path_hooks.call_method(py, "append", (path_hook,), None)?;
let path_finder = frozen_importlib_external.get(py, "PathFinder")?;
let meta_path = sys_module.get(py, "meta_path")?;
meta_path.call_method(py, "append", (path_finder,), None)?;
}
// Ideally we should be calling Py_SetPath() before Py_Initialize() to set sys.path.
// But we tried to do this and only ran into problems due to string conversions,
// unwanted side-effects. Updating sys.path directly before it is used by PathFinder
// (which was just registered above) should have the same effect.
// Always clear out sys.path.
let sys_path = sys_module.get(py, "path")?;
sys_path.call_method(py, "clear", NoArgs, None)?;
// And repopulate it with entries from the config.
for path in &state.sys_paths {
let py_path = PyString::new(py, path.as_str());
sys_path.call_method(py, "append", (py_path,), None)?;
}
Ok(py.None())
}
static mut MODULE_DEF: pyffi::PyModuleDef = pyffi::PyModuleDef {
m_base: pyffi::PyModuleDef_HEAD_INIT,
m_name: std::ptr::null(),
m_doc: std::ptr::null(),
m_size: std::mem::size_of::<ModuleState>() as isize,
m_methods: 0 as *mut _,
m_slots: 0 as *mut _,
m_traverse: None,
m_clear: None,
m_free: None,
};
/// Module initialization function.
///
/// This creates the Python module object.
///
/// We don't use the macros in the cpython crate because they are somewhat
/// opinionated about how things should work. e.g. they call
/// PyEval_InitThreads(), which is undesired. We want total control.
#[allow(non_snake_case)]
pub extern "C" fn PyInit__pyoxidizer_importer() -> *mut pyffi::PyObject {
let py = unsafe { cpython::Python::assume_gil_acquired() };
// TRACKING RUST1.32 We can't call as_ptr() in const fn in Rust 1.31.
unsafe {
if MODULE_DEF.m_name.is_null() {
MODULE_DEF.m_name = PYOXIDIZER_IMPORTER_NAME.as_ptr() as *const _;
MODULE_DEF.m_doc = DOC.as_ptr() as *const _;
}
}
let module = unsafe { pyffi::PyModule_Create(&mut MODULE_DEF) };
if module.is_null() {
return module;
}
let module = match unsafe { PyObject::from_owned_ptr(py, module).cast_into::<PyModule>(py) } {
Ok(m) => m,
Err(e) => {
PyErr::from(e).restore(py);
return std::ptr::null_mut();
}
};
match module_init(py, &module) {
Ok(()) => module.into_object().steal_ptr(),
Err(e) => {
e.restore(py);
std::ptr::null_mut()
}
}
}

View file

@ -1,34 +0,0 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
/*!
Manage an embedded Python interpreter.
The `pyembed` crate contains functionality for managing a Python interpreter
embedded in the current binary. This crate is typically used along with
[PyOxidizer](https://github.com/indygreg/PyOxidizer) for producing
self-contained binaries containing Python.
The most important types are [`PythonConfig`](struct.PythonConfig.html) and
[`MainPythonInterpreter`](struct.MainPythonInterpreter.html). A `PythonConfig`
defines how a Python interpreter is to behave. A `MainPythonInterpreter`
creates and manages that interpreter and serves as a high-level interface for
running code in the interpreter.
*/
mod config;
mod data;
mod importer;
mod pyalloc;
mod pyinterp;
mod pystr;
#[allow(unused_imports)]
pub use crate::config::PythonConfig;
#[allow(unused_imports)]
pub use crate::data::default_python_config;
#[allow(unused_imports)]
pub use crate::pyinterp::MainPythonInterpreter;

View file

@ -1,221 +0,0 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//! Custom Python memory allocators.
#[cfg(feature = "jemalloc-sys")]
use jemalloc_sys as jemallocffi;
use libc::{c_void, size_t};
use python3_sys as pyffi;
use std::alloc;
use std::collections::HashMap;
#[cfg(feature = "jemalloc-sys")]
use std::ptr::null_mut;
const MIN_ALIGN: usize = 16;
type RawAllocatorState = HashMap<*mut u8, alloc::Layout>;
/// Holds state for the raw memory allocator.
///
/// Ideally we wouldn't need to track state. But Rust's dealloc() API
/// requires passing in a Layout that matches the allocation. This means
/// we need to track the Layout for each allocation. This data structure
/// facilitates that.
///
/// TODO HashMap isn't thread safe and the Python raw allocator doesn't
/// hold the GIL. So we need a thread safe map or a mutex guarding access.
pub struct RawAllocator {
pub allocator: pyffi::PyMemAllocatorEx,
_state: Box<RawAllocatorState>,
}
extern "C" fn raw_rust_malloc(ctx: *mut c_void, size: size_t) -> *mut c_void {
// PyMem_RawMalloc()'s docs say: Requesting zero bytes returns a distinct
// non-NULL pointer if possible, as if PyMem_RawMalloc(1) had been called
// instead.
let size = match size {
0 => 1,
val => val,
};
unsafe {
let state = ctx as *mut RawAllocatorState;
let layout = alloc::Layout::from_size_align_unchecked(size, MIN_ALIGN);
let res = alloc::alloc(layout);
(*state).insert(res, layout);
//println!("allocated {} bytes to {:?}", size, res);
res as *mut c_void
}
}
extern "C" fn raw_rust_calloc(ctx: *mut c_void, nelem: size_t, elsize: size_t) -> *mut c_void {
// PyMem_RawCalloc()'s docs say: Requesting zero elements or elements of
// size zero bytes returns a distinct non-NULL pointer if possible, as if
// PyMem_RawCalloc(1, 1) had been called instead.
let size = match nelem * elsize {
0 => 1,
val => val,
};
unsafe {
let state = ctx as *mut RawAllocatorState;
let layout = alloc::Layout::from_size_align_unchecked(size, MIN_ALIGN);
let res = alloc::alloc_zeroed(layout);
(*state).insert(res, layout);
//println!("zero allocated {} bytes to {:?}", size, res);
res as *mut c_void
}
}
extern "C" fn raw_rust_realloc(
ctx: *mut c_void,
ptr: *mut c_void,
new_size: size_t,
) -> *mut c_void {
//println!("reallocating {:?} to {} bytes", ptr as *mut u8, new_size);
// PyMem_RawRealloc()'s docs say: If p is NULL, the call is equivalent to
// PyMem_RawMalloc(n); else if n is equal to zero, the memory block is
// resized but is not freed, and the returned pointer is non-NULL.
if ptr.is_null() {
return raw_rust_malloc(ctx, new_size);
}
let new_size = match new_size {
0 => 1,
val => val,
};
unsafe {
let state = ctx as *mut RawAllocatorState;
let layout = alloc::Layout::from_size_align_unchecked(new_size, MIN_ALIGN);
let key = ptr as *mut u8;
let old_layout = (*state)
.remove(&key)
.expect("original memory address not tracked");
let res = alloc::realloc(ptr as *mut u8, old_layout, new_size);
(*state).insert(res, layout);
res as *mut c_void
}
}
extern "C" fn raw_rust_free(ctx: *mut c_void, ptr: *mut c_void) {
if ptr.is_null() {
return;
}
//println!("freeing {:?}", ptr as *mut u8);
unsafe {
let state = ctx as *mut RawAllocatorState;
let key = ptr as *mut u8;
let layout = (*state)
.get(&key)
.expect(format!("could not find allocated memory record: {:?}", key).as_str());
alloc::dealloc(key, *layout);
(*state).remove(&key);
}
}
pub fn make_raw_rust_memory_allocator() -> RawAllocator {
// We need to allocate the HashMap on the heap so the pointer doesn't refer
// to the stack. We rebox and add the Box to our struct so lifetimes are
// managed.
let alloc = Box::new(HashMap::<*mut u8, alloc::Layout>::new());
let state = Box::into_raw(alloc);
let allocator = pyffi::PyMemAllocatorEx {
ctx: state as *mut c_void,
malloc: Some(raw_rust_malloc),
calloc: Some(raw_rust_calloc),
realloc: Some(raw_rust_realloc),
free: Some(raw_rust_free),
};
RawAllocator {
allocator,
_state: unsafe { Box::from_raw(state) },
}
}
// Now let's define a raw memory allocator that interfaces directly with jemalloc.
// This avoids the overhead of going through Rust's allocation layer.
#[cfg(feature = "jemalloc-sys")]
extern "C" fn raw_jemalloc_malloc(_ctx: *mut c_void, size: size_t) -> *mut c_void {
// PyMem_RawMalloc()'s docs say: Requesting zero bytes returns a distinct
// non-NULL pointer if possible, as if PyMem_RawMalloc(1) had been called
// instead.
let size = match size {
0 => 1,
val => val,
};
unsafe { jemallocffi::mallocx(size, 0) }
}
#[cfg(feature = "jemalloc-sys")]
extern "C" fn raw_jemalloc_calloc(_ctx: *mut c_void, nelem: size_t, elsize: size_t) -> *mut c_void {
// PyMem_RawCalloc()'s docs say: Requesting zero elements or elements of
// size zero bytes returns a distinct non-NULL pointer if possible, as if
// PyMem_RawCalloc(1, 1) had been called instead.
let size = match nelem * elsize {
0 => 1,
val => val,
};
unsafe { jemallocffi::mallocx(size, jemallocffi::MALLOCX_ZERO) }
}
#[cfg(feature = "jemalloc-sys")]
extern "C" fn raw_jemalloc_realloc(
ctx: *mut c_void,
ptr: *mut c_void,
new_size: size_t,
) -> *mut c_void {
// PyMem_RawRealloc()'s docs say: If p is NULL, the call is equivalent to
// PyMem_RawMalloc(n); else if n is equal to zero, the memory block is
// resized but is not freed, and the returned pointer is non-NULL.
if ptr.is_null() {
return raw_jemalloc_malloc(ctx, new_size);
}
let new_size = match new_size {
0 => 1,
val => val,
};
unsafe { jemallocffi::rallocx(ptr, new_size, 0) }
}
#[cfg(feature = "jemalloc-sys")]
extern "C" fn raw_jemalloc_free(_ctx: *mut c_void, ptr: *mut c_void) {
if ptr.is_null() {
return;
}
unsafe { jemallocffi::dallocx(ptr, 0) }
}
#[cfg(feature = "jemalloc-sys")]
pub fn make_raw_jemalloc_allocator() -> pyffi::PyMemAllocatorEx {
pyffi::PyMemAllocatorEx {
ctx: null_mut(),
malloc: Some(raw_jemalloc_malloc),
calloc: Some(raw_jemalloc_calloc),
realloc: Some(raw_jemalloc_realloc),
free: Some(raw_jemalloc_free),
}
}

View file

@ -1,776 +0,0 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//! Manage an embedded Python interpreter.
use libc::c_char;
use python3_sys as pyffi;
use std::collections::BTreeSet;
use std::env;
use std::ffi::CString;
use std::fs;
use std::io::Write;
use std::path::PathBuf;
use std::ptr::null;
use cpython::exc::ValueError;
use cpython::{
GILGuard, NoArgs, ObjectProtocol, PyClone, PyDict, PyErr, PyList, PyModule, PyObject, PyResult,
Python, PythonObject, ToPyObject,
};
use super::config::{PythonConfig, PythonRawAllocator, PythonRunMode};
use super::importer::PyInit__pyoxidizer_importer;
#[cfg(feature = "jemalloc-sys")]
use super::pyalloc::make_raw_jemalloc_allocator;
use super::pyalloc::{make_raw_rust_memory_allocator, RawAllocator};
use super::pystr::{osstring_to_bytes, osstring_to_str, OwnedPyStr};
pub const PYOXIDIZER_IMPORTER_NAME: &[u8] = b"_pyoxidizer_importer\0";
const FROZEN_IMPORTLIB_NAME: &[u8] = b"_frozen_importlib\0";
const FROZEN_IMPORTLIB_EXTERNAL_NAME: &[u8] = b"_frozen_importlib_external\0";
/// Represents the results of executing Python code with exception handling.
#[derive(Debug)]
pub enum PythonRunResult {
/// Code executed without raising an exception.
Ok {},
/// Code executed and raised an exception.
Err {},
/// Code executed and raised SystemExit with the specified exit code.
Exit { code: i32 },
}
fn make_custom_frozen_modules(config: &PythonConfig) -> [pyffi::_frozen; 3] {
[
pyffi::_frozen {
name: FROZEN_IMPORTLIB_NAME.as_ptr() as *const i8,
code: config.frozen_importlib_data.as_ptr(),
size: config.frozen_importlib_data.len() as i32,
},
pyffi::_frozen {
name: FROZEN_IMPORTLIB_EXTERNAL_NAME.as_ptr() as *const i8,
code: config.frozen_importlib_external_data.as_ptr(),
size: config.frozen_importlib_external_data.len() as i32,
},
pyffi::_frozen {
name: null(),
code: null(),
size: 0,
},
]
}
#[cfg(windows)]
extern "C" {
pub fn __acrt_iob_func(x: u32) -> *mut libc::FILE;
}
#[cfg(windows)]
fn stdin_to_file() -> *mut libc::FILE {
// The stdin symbol is made available by importing <stdio.h>. On Windows,
// stdin is defined in corecrt_wstdio.h as a `#define` that calls this
// internal CRT function. There's no exported symbol to use. So we
// emulate the behavior of the C code.
//
// Relying on an internal CRT symbol is probably wrong. But Microsoft
// typically keeps backwards compatibility for undocumented functions
// like this because people use them in the wild.
//
// An attempt was made to use fdopen(0) like we do on POSIX. However,
// this causes a crash. The Microsoft C Runtime is already bending over
// backwards to coerce its native HANDLEs into POSIX file descriptors.
// Even if there are other ways to coerce a FILE* from a HANDLE
// (_open_osfhandle() + _fdopen() might work), using the same function
// that <stdio.h> uses to obtain a FILE* seems like the least risky thing
// to do.
unsafe { __acrt_iob_func(0) }
}
#[cfg(unix)]
fn stdin_to_file() -> *mut libc::FILE {
unsafe { libc::fdopen(libc::STDIN_FILENO, &('r' as libc::c_char)) }
}
#[cfg(windows)]
fn stderr_to_file() -> *mut libc::FILE {
unsafe { __acrt_iob_func(2) }
}
#[cfg(unix)]
fn stderr_to_file() -> *mut libc::FILE {
unsafe { libc::fdopen(libc::STDERR_FILENO, &('w' as libc::c_char)) }
}
#[cfg(feature = "jemalloc-sys")]
fn raw_jemallocator() -> pyffi::PyMemAllocatorEx {
make_raw_jemalloc_allocator()
}
#[cfg(not(feature = "jemalloc-sys"))]
fn raw_jemallocator() -> pyffi::PyMemAllocatorEx {
panic!("jemalloc is not available in this build configuration");
}
/// Manages an embedded Python interpreter.
///
/// **Warning: Python interpreters have global state. There should only be a
/// single instance of this type per process.**
///
/// Instances must only be constructed through [`MainPythonInterpreter::new()`](#method.new).
///
/// This type and its various functionality is a glorified wrapper around the
/// Python C API. But there's a lot of added functionality on top of what the C
/// API provides.
///
/// Both the low-level `python3-sys` and higher-level `cpython` crates are used.
pub struct MainPythonInterpreter<'a> {
pub config: PythonConfig,
frozen_modules: [pyffi::_frozen; 3],
init_run: bool,
raw_allocator: Option<pyffi::PyMemAllocatorEx>,
raw_rust_allocator: Option<RawAllocator>,
gil: Option<GILGuard>,
py: Option<Python<'a>>,
program_name: Option<OwnedPyStr>,
}
impl<'a> MainPythonInterpreter<'a> {
/// Construct a Python interpreter from a configuration.
///
/// The Python interpreter is initialized as a side-effect. The GIL is held.
pub fn new(config: PythonConfig) -> Result<MainPythonInterpreter<'a>, &'static str> {
let (raw_allocator, raw_rust_allocator) = match config.raw_allocator {
PythonRawAllocator::Jemalloc => (Some(raw_jemallocator()), None),
PythonRawAllocator::Rust => (None, Some(make_raw_rust_memory_allocator())),
PythonRawAllocator::System => (None, None),
};
let frozen_modules = make_custom_frozen_modules(&config);
let mut res = MainPythonInterpreter {
config,
frozen_modules,
init_run: false,
raw_allocator,
raw_rust_allocator,
gil: None,
py: None,
program_name: None,
};
res.init()?;
Ok(res)
}
/// Initialize the interpreter.
///
/// This mutates global state in the Python interpreter according to the
/// bound config and initializes the Python interpreter.
///
/// After this is called, the embedded Python interpreter is ready to
/// execute custom code.
///
/// If called more than once, the function is a no-op from the perspective
/// of interpreter initialization.
///
/// Returns a Python instance which has the GIL acquired.
fn init(&mut self) -> Result<Python, &'static str> {
if self.init_run {
return Ok(self.acquire_gil());
}
let config = &self.config;
let exe = env::current_exe().or_else(|_| Err("could not obtain current exe"))?;
let origin = exe
.parent()
.ok_or_else(|| "unable to get exe parent")?
.display()
.to_string();
let sys_paths: Vec<String> = config
.sys_paths
.iter()
.map(|path| path.replace("$ORIGIN", &origin))
.collect();
// TODO should we call PyMem::SetupDebugHooks() if enabled?
if let Some(raw_allocator) = &self.raw_allocator {
unsafe {
let ptr = raw_allocator as *const _;
pyffi::PyMem_SetAllocator(
pyffi::PyMemAllocatorDomain::PYMEM_DOMAIN_RAW,
ptr as *mut _,
);
}
} else if let Some(raw_rust_allocator) = &self.raw_rust_allocator {
unsafe {
let ptr = &raw_rust_allocator.allocator as *const _;
pyffi::PyMem_SetAllocator(
pyffi::PyMemAllocatorDomain::PYMEM_DOMAIN_RAW,
ptr as *mut _,
);
}
}
// Module state is a bit wonky.
//
// Our in-memory importer relies on a special module which holds references
// to Python objects exposing module/resource data. This module is imported as
// part of initializing the Python interpreter.
//
// This Python module object needs to hold references to the raw Python module
// and resource data. Those references are defined by the InitModuleState struct.
//
// Unfortunately, we can't easily associate state with the interpreter before
// calling Py_Initialize(). And the module initialization function receives no
// arguments. Our solution is to update a global pointer to point at "our" state
// then call Py_Initialize(). The module will be initialized as part of calling
// Py_Initialize(). It will copy the contents at the pointer into the local
// module state and the global pointer will be unused after that. The end result
// is that we have no reliance on global variables outside of a short window
// between now and when Py_Initialize() is called.
//
// We could potentially do away with this global variable by using a closure for
// the initialization function. But this rabbit hole may involve gross hackery
// like dynamic module names. It probably isn't worth it.
// It is important for references in this struct to have a lifetime of at least
// that of the interpreter.
// TODO specify lifetimes so the compiler validates this for us.
let module_state = super::importer::InitModuleState {
register_filesystem_importer: self.config.filesystem_importer,
sys_paths,
py_modules_data: config.py_modules_data,
py_resources_data: config.py_resources_data,
};
if config.use_custom_importlib {
// Replace the frozen modules in the interpreter with our custom set
// that knows how to import from memory.
unsafe {
pyffi::PyImport_FrozenModules = self.frozen_modules.as_ptr();
}
// Register our _pyoxidizer_importer extension which provides importing functionality.
unsafe {
// name char* needs to live as long as the interpreter is active.
pyffi::PyImport_AppendInittab(
PYOXIDIZER_IMPORTER_NAME.as_ptr() as *const i8,
Some(PyInit__pyoxidizer_importer),
);
// Move pointer to our stack allocated instance. This pointer will be
// accessed when creating the Python module object, which should be
// done automatically as part of low-level interpreter initialization
// when calling Py_Initialize() below.
super::importer::NEXT_MODULE_STATE = &module_state;
}
}
let home =
OwnedPyStr::from_str(exe.to_str().ok_or_else(|| "unable to convert exe to str")?)?;
unsafe {
// Pointer needs to live for lifetime of interpreter.
pyffi::Py_SetPythonHome(home.as_wchar_ptr());
}
let program_name = OwnedPyStr::from_str(config.program_name.as_str())?;
unsafe {
pyffi::Py_SetProgramName(program_name.as_wchar_ptr());
}
// Value needs to live for lifetime of interpreter.
self.program_name = Some(program_name);
// If we don't call Py_SetPath(), Python has its own logic for initializing it.
// We set it to an empty string because we don't want any paths by default. If
// we do have defined paths, they will be set after Py_Initialize().
unsafe {
// Value is copied internally. So short lifetime is OK.
let value = OwnedPyStr::from_str("")?;
pyffi::Py_SetPath(value.as_wchar_ptr());
}
if let (Some(ref encoding), Some(ref errors)) =
(&config.standard_io_encoding, &config.standard_io_errors)
{
let cencoding = CString::new(encoding.clone())
.or_else(|_| Err("unable to convert encoding to C string"))?;
let cerrors = CString::new(errors.clone())
.or_else(|_| Err("unable to convert encoding error mode to C string"))?;
let res = unsafe {
pyffi::Py_SetStandardStreamEncoding(
cencoding.as_ptr() as *const i8,
cerrors.as_ptr() as *const i8,
)
};
if res != 0 {
return Err("unable to set standard stream encoding");
}
}
unsafe {
pyffi::Py_DontWriteBytecodeFlag = if config.dont_write_bytecode { 1 } else { 0 };
pyffi::Py_IgnoreEnvironmentFlag = if config.ignore_python_env { 1 } else { 0 };
pyffi::Py_NoSiteFlag = if config.import_site { 0 } else { 1 };
pyffi::Py_NoUserSiteDirectory = if config.import_user_site { 0 } else { 1 };
pyffi::Py_OptimizeFlag = config.opt_level;
pyffi::Py_UnbufferedStdioFlag = if config.unbuffered_stdio { 1 } else { 0 };
}
/* Pre-initialization functions we could support:
*
* PyObject_SetArenaAllocator()
* PySys_AddWarnOption()
* PySys_AddXOption()
* PySys_ResetWarnOptions()
*/
unsafe {
pyffi::Py_Initialize();
}
// We shouldn't be accessing this pointer after Py_Initialize(). And the
// memory is stack allocated and doesn't outlive this frame. We don't want
// to leave a stack pointer sitting around!
unsafe {
super::importer::NEXT_MODULE_STATE = std::ptr::null();
}
let py = unsafe { Python::assume_gil_acquired() };
self.py = Some(py);
self.init_run = true;
// env::args() panics if arguments aren't valid Unicode. But invalid
// Unicode arguments are possible and some applications may want to
// support them.
//
// env::args_os() provides access to the raw OsString instances, which
// will be derived from wchar_t on Windows and char* on POSIX. We can
// convert these to Python str instances using a platform-specific
// mechanism.
let args_objs = env::args_os()
.map(|os_arg| osstring_to_str(py, os_arg))
.collect::<Result<Vec<PyObject>, &'static str>>()?;
// This will steal the pointer to the elements and mem::forget them.
let args = PyList::new(py, &args_objs);
let argv = b"argv\0";
let res = args.with_borrowed_ptr(py, |args_ptr| unsafe {
pyffi::PySys_SetObject(argv.as_ptr() as *const i8, args_ptr)
});
match res {
0 => (),
_ => return Err("unable to set sys.argv"),
}
if config.argvb {
let args_objs: Vec<PyObject> = env::args_os()
.map(|os_arg| osstring_to_bytes(py, os_arg))
.collect();
let args = PyList::new(py, &args_objs);
let argvb = b"argvb\0";
let res = args.with_borrowed_ptr(py, |args_ptr| unsafe {
pyffi::PySys_SetObject(argvb.as_ptr() as *const i8, args_ptr)
});
match res {
0 => (),
_ => return Err("unable to set sys.argvb"),
}
}
// As a convention, sys.oxidized is set to indicate we are running from
// a self-contained application.
let oxidized = b"oxidized\0";
let res = py.True().with_borrowed_ptr(py, |py_true| unsafe {
pyffi::PySys_SetObject(oxidized.as_ptr() as *const i8, py_true)
});
match res {
0 => (),
_ => return Err("unable to set sys.oxidized"),
}
Ok(py)
}
/// Ensure the Python GIL is released.
pub fn release_gil(&mut self) {
if self.py.is_some() {
self.py = None;
self.gil = None;
}
}
/// Ensure the Python GIL is acquired, returning a handle on the interpreter.
pub fn acquire_gil(&mut self) -> Python<'a> {
match self.py {
Some(py) => py,
None => {
let gil = GILGuard::acquire();
let py = unsafe { Python::assume_gil_acquired() };
self.gil = Some(gil);
self.py = Some(py);
py
}
}
}
/// Runs the interpreter with the default code execution settings.
///
/// The crate was built with settings that configure what should be
/// executed by default. Those settings will be loaded and executed.
pub fn run(&mut self) -> PyResult<PyObject> {
// clone() to avoid issues mixing mutable and immutable borrows of self.
let run = self.config.run.clone();
let py = self.acquire_gil();
match run {
PythonRunMode::None => Ok(py.None()),
PythonRunMode::Repl => self.run_repl(),
PythonRunMode::Module { module } => self.run_module_as_main(&module),
PythonRunMode::Eval { code } => self.run_code(&code),
}
}
/// Handle a raised SystemExit exception.
///
/// This emulates the behavior in pythonrun.c:handle_system_exit() and
/// _Py_HandleSystemExit() but without the call to exit(), which we don't want.
fn handle_system_exit(&mut self, py: Python, err: PyErr) -> Result<i32, &'static str> {
std::io::stdout()
.flush()
.or_else(|_| Err("failed to flush stdout"))?;
let mut value = match err.pvalue {
Some(ref instance) => {
if instance.as_ptr() == py.None().as_ptr() {
return Ok(0);
}
instance.clone_ref(py)
}
None => {
return Ok(0);
}
};
if unsafe { pyffi::PyExceptionInstance_Check(value.as_ptr()) } != 0 {
// The error code should be in the "code" attribute.
if let Ok(code) = value.getattr(py, "code") {
if code == py.None() {
return Ok(0);
}
// Else pretend exc_value.code is the new exception value to use
// and fall through to below.
value = code;
}
}
if unsafe { pyffi::PyLong_Check(value.as_ptr()) } != 0 {
return Ok(unsafe { pyffi::PyLong_AsLong(value.as_ptr()) as i32 });
}
let sys_module = py
.import("sys")
.or_else(|_| Err("unable to obtain sys module"))?;
let stderr = sys_module.get(py, "stderr");
// This is a cargo cult from the canonical implementation.
unsafe { pyffi::PyErr_Clear() }
match stderr {
Ok(o) => unsafe {
pyffi::PyFile_WriteObject(value.as_ptr(), o.as_ptr(), pyffi::Py_PRINT_RAW);
},
Err(_) => {
unsafe {
pyffi::PyObject_Print(value.as_ptr(), stderr_to_file(), pyffi::Py_PRINT_RAW);
}
std::io::stderr()
.flush()
.or_else(|_| Err("failure to flush stderr"))?;
}
}
unsafe {
pyffi::PySys_WriteStderr(b"\n\0".as_ptr() as *const i8);
}
// This frees references to this exception, which may be necessary to avoid
// badness.
err.restore(py);
unsafe {
pyffi::PyErr_Clear();
}
Ok(1)
}
/// Runs the interpreter and handles any exception that was raised.
pub fn run_and_handle_error(&mut self) -> PythonRunResult {
// There are underdefined lifetime bugs at play here. There is no
// explicit lifetime for the PyObject's returned. If we don't have
// the local variable in scope, we can get into a situation where
// drop() on self is called before the PyObject's drop(). This is
// problematic because PyObject's drop() attempts to acquire the GIL.
// If the interpreter is shut down, there is no GIL to acquire, and
// we may segfault.
// TODO look into setting lifetimes properly so the compiler can
// prevent some issues.
let res = self.run();
let py = self.acquire_gil();
match res {
Ok(_) => PythonRunResult::Ok {},
Err(err) => {
// SystemExit is special in that PyErr_PrintEx() will call
// exit() if it is seen. So, we handle it manually so we can
// return an exit code instead of exiting.
// TODO surely the cpython crate offers a better way to do this...
err.restore(py);
let matches =
unsafe { pyffi::PyErr_ExceptionMatches(pyffi::PyExc_SystemExit) } != 0;
let err = cpython::PyErr::fetch(py);
if matches {
return PythonRunResult::Exit {
code: match self.handle_system_exit(py, err) {
Ok(code) => code,
Err(msg) => {
eprintln!("{}", msg);
1
}
},
};
}
self.print_err(err);
PythonRunResult::Err {}
}
}
}
/// Calls run() and resolves a suitable exit code.
pub fn run_as_main(&mut self) -> i32 {
match self.run_and_handle_error() {
PythonRunResult::Ok {} => 0,
PythonRunResult::Err {} => 1,
PythonRunResult::Exit { code } => code,
}
}
/// Runs a Python module as the __main__ module.
///
/// Returns the execution result of the module code.
///
/// The interpreter is automatically initialized if needed.
pub fn run_module_as_main(&mut self, name: &str) -> PyResult<PyObject> {
let py = self.acquire_gil();
// This is modeled after runpy.py:_run_module_as_main().
let main: PyModule = unsafe {
PyObject::from_borrowed_ptr(
py,
pyffi::PyImport_AddModule("__main__\0".as_ptr() as *const c_char),
)
.cast_into(py)?
};
let main_dict = main.dict(py);
let importlib_util = py.import("importlib.util")?;
let spec = importlib_util.call(py, "find_spec", (name,), None)?;
let loader = spec.getattr(py, "loader")?;
let code = loader.call_method(py, "get_code", (name,), None)?;
let origin = spec.getattr(py, "origin")?;
let cached = spec.getattr(py, "cached")?;
// TODO handle __package__.
main_dict.set_item(py, "__name__", "__main__")?;
main_dict.set_item(py, "__file__", origin)?;
main_dict.set_item(py, "__cached__", cached)?;
main_dict.set_item(py, "__doc__", py.None())?;
main_dict.set_item(py, "__loader__", loader)?;
main_dict.set_item(py, "__spec__", spec)?;
unsafe {
let globals = main_dict.as_object().as_ptr();
let res = pyffi::PyEval_EvalCode(code.as_ptr(), globals, globals);
if res.is_null() {
let err = PyErr::fetch(py);
err.print(py);
Err(PyErr::fetch(py))
} else {
Ok(PyObject::from_owned_ptr(py, res))
}
}
}
/// Start and run a Python REPL.
///
/// This emulates what CPython's main.c does.
///
/// The interpreter is automatically initialized if needed.
pub fn run_repl(&mut self) -> PyResult<PyObject> {
let py = self.acquire_gil();
unsafe {
pyffi::Py_InspectFlag = 0;
}
// readline is optional. We don't care if it fails.
if py.import("readline").is_ok() {}
let sys = py.import("sys")?;
if let Ok(hook) = sys.get(py, "__interactivehook__") {
hook.call(py, NoArgs, None)?;
}
let stdin_filename = "<stdin>";
let filename = CString::new(stdin_filename)
.or_else(|_| Err(PyErr::new::<ValueError, _>(py, "could not create CString")))?;
let mut cf = pyffi::PyCompilerFlags { cf_flags: 0 };
// TODO use return value.
unsafe {
let stdin = stdin_to_file();
pyffi::PyRun_AnyFileExFlags(stdin, filename.as_ptr() as *const c_char, 0, &mut cf)
};
Ok(py.None())
}
/// Runs Python code provided by a string.
///
/// This is similar to what ``python -c <code>`` would do.
///
/// The interpreter is automatically initialized if needed.
pub fn run_code(&mut self, code: &str) -> PyResult<PyObject> {
let py = self.acquire_gil();
let code = CString::new(code).or_else(|_| {
Err(PyErr::new::<ValueError, _>(
py,
"source code is not a valid C string",
))
})?;
unsafe {
let main = pyffi::PyImport_AddModule("__main__\0".as_ptr() as *const _);
if main.is_null() {
return Err(PyErr::fetch(py));
}
let main_dict = pyffi::PyModule_GetDict(main);
let res = pyffi::PyRun_StringFlags(
code.as_ptr() as *const _,
pyffi::Py_file_input,
main_dict,
main_dict,
std::ptr::null_mut(),
);
if res.is_null() {
Err(PyErr::fetch(py))
} else {
Ok(PyObject::from_owned_ptr(py, res))
}
}
}
/// Print a Python error.
///
/// Under the hood this calls ``PyErr_PrintEx()``, which may call
/// ``Py_Exit()`` and may write to stderr.
pub fn print_err(&mut self, err: PyErr) {
let py = self.acquire_gil();
err.print(py);
}
}
/// Write loaded Python modules to a directory.
///
/// Given a Python interpreter and a path to a directory, this will create a
/// file in that directory named ``modules-<UUID>`` and write a ``\n`` delimited
/// list of loaded names from ``sys.modules`` into that file.
fn write_modules_to_directory(py: Python, path: &PathBuf) -> Result<(), &'static str> {
// TODO this needs better error handling all over.
fs::create_dir_all(path).or_else(|_| Err("could not create directory for modules"))?;
let rand = uuid::Uuid::new_v4();
let path = path.join(format!("modules-{}", rand.to_string()));
let sys = py
.import("sys")
.or_else(|_| Err("could not obtain sys module"))?;
let modules = sys
.get(py, "modules")
.or_else(|_| Err("could not obtain sys.modules"))?;
let modules = modules
.cast_as::<PyDict>(py)
.or_else(|_| Err("sys.modules is not a dict"))?;
let mut names = BTreeSet::new();
for (key, _value) in modules.items(py) {
names.insert(
key.extract::<String>(py)
.or_else(|_| Err("module name is not a str"))?,
);
}
let mut f = fs::File::create(path).or_else(|_| Err("could not open file for writing"))?;
for name in names {
f.write_fmt(format_args!("{}\n", name))
.or_else(|_| Err("could not write"))?;
}
Ok(())
}
impl<'a> Drop for MainPythonInterpreter<'a> {
fn drop(&mut self) {
if let Some(key) = &self.config.write_modules_directory_env {
if let Ok(path) = env::var(key) {
let path = PathBuf::from(path);
let py = self.acquire_gil();
if let Err(msg) = write_modules_to_directory(py, &path) {
eprintln!("error writing modules file: {}", msg);
}
}
}
let _ = unsafe { pyffi::Py_FinalizeEx() };
}
}

View file

@ -1,98 +0,0 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//! Bridge Rust and Python string types.
use libc::{c_void, size_t, wchar_t};
use python3_sys as pyffi;
use std::ffi::{CString, OsString};
use std::ptr::null_mut;
#[cfg(target_family = "unix")]
use std::os::unix::ffi::OsStrExt;
#[cfg(target_family = "windows")]
use std::os::windows::prelude::OsStrExt;
use cpython::{PyObject, Python};
#[derive(Debug)]
pub struct OwnedPyStr {
data: *const wchar_t,
}
impl OwnedPyStr {
pub fn as_wchar_ptr(&self) -> *const wchar_t {
self.data
}
pub fn from_str(s: &str) -> Result<Self, &'static str> {
// We need to convert to a C string so there is a terminal NULL
// otherwise Py_DecodeLocale() can get confused.
let cs = CString::new(s).or_else(|_| Err("source string has NULL bytes"))?;
let size: *mut size_t = null_mut();
let ptr = unsafe { pyffi::Py_DecodeLocale(cs.as_ptr(), size) };
if ptr.is_null() {
Err("could not convert str to Python string")
} else {
Ok(OwnedPyStr { data: ptr })
}
}
}
impl Drop for OwnedPyStr {
fn drop(&mut self) {
unsafe { pyffi::PyMem_RawFree(self.data as *mut c_void) }
}
}
#[cfg(target_family = "unix")]
const SURROGATEESCAPE: &[u8] = b"surrogateescape\0";
#[cfg(target_family = "unix")]
pub fn osstring_to_str(py: Python, s: OsString) -> Result<PyObject, &'static str> {
// PyUnicode_DecodeLocaleAndSize says the input must have a trailing NULL.
// So use a CString for that.
let b = CString::new(s.as_bytes()).or_else(|_| Err("not a valid C string"))?;
unsafe {
let o = pyffi::PyUnicode_DecodeLocaleAndSize(
b.as_ptr() as *const i8,
b.to_bytes().len() as isize,
SURROGATEESCAPE.as_ptr() as *const i8,
);
Ok(PyObject::from_owned_ptr(py, o))
}
}
#[cfg(target_family = "windows")]
pub fn osstring_to_str(py: Python, s: OsString) -> Result<PyObject, &'static str> {
// Windows OsString should be valid UTF-16.
let w: Vec<u16> = s.encode_wide().collect();
unsafe {
Ok(PyObject::from_owned_ptr(
py,
pyffi::PyUnicode_FromWideChar(w.as_ptr(), w.len() as isize),
))
}
}
#[cfg(target_family = "unix")]
pub fn osstring_to_bytes(py: Python, s: OsString) -> PyObject {
let b = s.as_bytes();
unsafe {
let o = pyffi::PyBytes_FromStringAndSize(b.as_ptr() as *const i8, b.len() as isize);
PyObject::from_owned_ptr(py, o)
}
}
#[cfg(target_family = "windows")]
pub fn osstring_to_bytes(py: Python, s: OsString) -> PyObject {
let w: Vec<u16> = s.encode_wide().collect();
unsafe {
let o = pyffi::PyBytes_FromStringAndSize(w.as_ptr() as *const i8, w.len() as isize * 2);
PyObject::from_owned_ptr(py, o)
}
}

View file

@ -1,119 +0,0 @@
# This file controls the PyOxidizer build configuration. See the
# pyoxidizer crate's documentation for extensive documentation
# on this file format.
[[build]]
application_name = "insee_translator"
[[embedded_python_config]]
raw_allocator = "jemalloc"
# dont_write_bytecode = true
# ignore_environment = true
# no_site = true
# no_user_site_directory = true
# optimize_level = 0
# stdio_encoding = "utf-8:strict"
# unbuffered_stdio = false
#write_modules_directory_env = "PYOXIDIZER_WRITE_MODULES_DIR"
# Windows doesn't support jemalloc.
[[embedded_python_config]]
build_target = "x86_64-pc-windows-msvc"
raw_allocator = "system"
[[packaging_rule]]
type = "stdlib-extensions-policy"
# Package all available extension modules from the Python distribution.
# The Python interpreter will be fully featured.
policy = "all"
# Only package the minimal set of extension modules needed to initialize
# a Python interpreter. Many common packages in Python's standard
# library won't work with this setting.
# policy = "minimal"
# Only package extension modules that don't require linking against
# non-Python libraries. e.g. will exclude support for OpenSSL, SQLite3,
# other features that require external libraries.
# policy = "no-libraries"
# Explicit list of extension modules from the distribution to include.
# [[packaging_rule]]
# type = "stdlib-extensions-explicit-includes"
# includes = ["binascii", "errno", "itertools", "math", "select", "_socket"]
# Explicit list of extension modules from the distribution to exclude.
# [[packaging_rule]
# type = "stdlib-extensions-explicit-excludes"
# excludes = ["_ssl"]
# Package the entire Python standard library without sources.
[[packaging_rule]]
type = "stdlib"
include_source = false
# Write out license files next to the produced binary.
[[packaging_rule]]
type = "write-license-files"
path = ""
# Package .py files discovered in a local directory.
[[packaging_rule]]
type = "package-root"
path = "."
packages = ["data", "main"]
# Package things from a populated virtualenv.
# [[packaging_rule]]
# type = "virtualenv"
# path = "/path/to/venv"
# Filter all resources collected so far through a filter of names
# in a file.
# [[packaging_rule]]
# type = "filter-include"
# files = ["/path/to/filter-file"]
# How Python should run by default. This is only needed if you
# call ``run()``. For applications customizing how the embedded
# Python interpreter is invoked, this section is not relevant.
[[embedded_python_run]]
# Run an interactive Python interpreter.
#mode = "repl"
# Import a Python module and run it.
mode = "module"
module = "main.main"
# Evaluate some Python code.
#mode = "eval"
#code = "import main; main.main()"
# END OF COMMON USER-ADJUSTED SETTINGS.
#
# Everything below this is typically managed by PyOxidizer and doesn't need
# to be updated by people.
[[python_distribution]]
build_target = "x86_64-apple-darwin"
url = "https://github.com/indygreg/python-build-standalone/releases/download/20190617/cpython-3.7.3-macos-20190618T0523.tar.zst"
sha256 = "6668202a3225892ce252eff4bb53a58ac058b6a413ab9d37c026a500c2a561ee"
[[python_distribution]]
build_target = "x86_64-pc-windows-msvc"
url = "https://github.com/indygreg/python-build-standalone/releases/download/20190617/cpython-3.7.3-windows-amd64-20190618T0516.tar.zst"
sha256 = "fd43554b5654a914846cf1c251d1ad366f46c7c4d20b7c44572251b533351221"
[[python_distribution]]
build_target = "x86_64-unknown-linux-gnu"
url = "https://github.com/indygreg/python-build-standalone/releases/download/20190617/cpython-3.7.3-linux64-20190618T0324.tar.zst"
sha256 = "d6b80a9723c124d6d193f8816fdb874ba6d56abfb35cbfcc2b27de53176d0620"
[[python_distribution]]
build_target = "x86_64-unknown-linux-musl"
url = "https://github.com/indygreg/python-build-standalone/releases/download/20190617/cpython-3.7.3-linux64-musl-20190618T0400.tar.zst"
sha256 = "2be2d109b82634b36685b89800887501b619ef946dda182e5a8ab5c7029a8136"
[[pyoxidizer]]
version = "0.2.0"
commit = ""

16
pyproject.toml Normal file
View file

@ -0,0 +1,16 @@
[tool.poetry]
name = "insee_number_translator"
version = "0.1.0"
description = "Translate french INSEE number to meaningful data"
authors = ["Gabriel Augendre <gabriel@augendre.info>"]
license = "MIT"
[tool.poetry.dependencies]
python = "^3.9"
[tool.poetry.dev-dependencies]
pre-commit = "^2.13.0"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

View file

@ -1,30 +0,0 @@
use pyembed::{default_python_config, MainPythonInterpreter};
fn main() {
// The following code is in a block so the MainPythonInterpreter is destroyed in an
// orderly manner, before process exit.
let code = {
// Load the default Python configuration as derived by the PyOxidizer config
// file used at build time.
let config = default_python_config();
// Construct a new Python interpreter using that config, handling any errors
// from construction.
match MainPythonInterpreter::new(config) {
Ok(mut interp) => {
// And run it using the default run configuration as specified by the
// configuration. If an uncaught Python exception is raised, handle it.
// This includes the special SystemExit, which is a request to terminate the
// process.
interp.run_as_main()
}
Err(msg) => {
eprintln!("{}", msg);
1
}
}
};
// And exit the process according to code execution results.
std::process::exit(code);
}