diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..541717a --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,33 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: check-ast + types: [python] + - id: check-json + types: [json] + - id: check-toml + types: [toml] + - id: check-xml + types: [xml] + - id: check-yaml + types: [yaml] + - id: end-of-file-fixer + - id: check-merge-conflict + - id: pretty-format-json + args: + - --autofix + - --no-sort-keys + - id: trailing-whitespace + args: + - --markdown-linebreak-ext=md + - repo: https://github.com/timothycrosley/isort + rev: 5.9.2 + hooks: + - id: isort + types: [python] + - repo: https://github.com/psf/black + rev: 21.6b0 + hooks: + - id: black + types: [python] diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index 38da0ba..0000000 --- a/Cargo.lock +++ /dev/null @@ -1,351 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -[[package]] -name = "aho-corasick" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "autocfg" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "bitflags" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "byteorder" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "cc" -version = "1.0.37" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "cfg-if" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "cloudabi" -version = "0.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "cpython" -version = "0.2.1" -source = "git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0#2093cc5ab9b29d7db2255a0df836d89e440754b6" -dependencies = [ - "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "python3-sys 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)", -] - -[[package]] -name = "fs_extra" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "fuchsia-cprng" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "insee_translator" -version = "0.1.0" -dependencies = [ - "jemallocator-global 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "pyembed 0.2.0", -] - -[[package]] -name = "jemalloc-sys" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)", - "fs_extra 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "jemallocator" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "jemalloc-sys 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "jemallocator-global" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", - "jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "lazy_static" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "libc" -version = "0.2.59" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "memchr" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "num-traits" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "pyembed" -version = "0.2.0" -dependencies = [ - "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "cpython 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)", - "jemalloc-sys 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", - "python3-sys 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)", - "uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "python3-sys" -version = "0.2.1" -source = "git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0#2093cc5ab9b29d7db2255a0df836d89e440754b6" -dependencies = [ - "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.1.9 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rand" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_jitter 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rand_chacha" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rand_core" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rand_core" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "rand_hc" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rand_isaac" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rand_jitter" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rand_os" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", - "fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rand_pcg" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rand_xorshift" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "rdrand" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "regex" -version = "1.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "aho-corasick 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", - "utf8-ranges 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "regex-syntax" -version = "0.6.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "thread_local" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "ucd-util" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "utf8-ranges" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "uuid" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "winapi" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[metadata] -"checksum aho-corasick 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)" = "36b7aa1ccb7d7ea3f437cf025a2ab1c47cc6c1bc9fc84918ff449def12f5e282" -"checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf" -"checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd" -"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" -"checksum cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)" = "39f75544d7bbaf57560d2168f28fd649ff9c76153874db88bdbdfd839b1a7e7d" -"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33" -"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f" -"checksum cpython 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)" = "" -"checksum fs_extra 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5f2a4a2034423744d2cc7ca2068453168dcdb82c438419e639a26bd87839c674" -"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" -"checksum jemalloc-sys 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45" -"checksum jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69" -"checksum jemallocator-global 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "991b61de8365c8b5707cf6cabbff98cfd6eaca9b851948b883efea408c7f581e" -"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" -"checksum libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)" = "3262021842bf00fe07dbd6cf34ff25c99d7a7ebef8deea84db72be3ea3bb0aff" -"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" -"checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32" -"checksum python3-sys 0.2.1 (git+https://github.com/indygreg/PyOxidizer.git?tag=v0.2.0)" = "" -"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca" -"checksum rand_chacha 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "556d3a1ca6600bfcbab7c7c91ccb085ac7fbbcd70e008a98742e7847f4f7bcef" -"checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" -"checksum rand_core 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d0e7a549d590831370895ab7ba4ea0c1b6b011d106b5ff2da6eee112615e6dc0" -"checksum rand_hc 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b40677c7be09ae76218dc623efbf7b18e34bced3f38883af07bb75630a21bc4" -"checksum rand_isaac 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ded997c9d5f13925be2a6fd7e66bf1872597f759fd9dd93513dd7e92e5a5ee08" -"checksum rand_jitter 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b" -"checksum rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071" -"checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44" -"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c" -"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -"checksum regex 1.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "d9d8297cc20bbb6184f8b45ff61c8ee6a9ac56c156cec8e38c3e5084773c44ad" -"checksum regex-syntax 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "9b01330cce219c1c6b2e209e5ed64ccd587ae5c67bed91c0b49eecf02ae40e21" -"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" -"checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" -"checksum utf8-ranges 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "9d50aa7650df78abf942826607c62468ce18d9019673d4a2ebe1865dbb96ffde" -"checksum uuid 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)" = "90dbc611eb48397705a6b0f6e917da23ae517e4d127123d2cf7674206627d32a" -"checksum winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f10e386af2b13e47c89e7236a7a14a086791a2b88ebad6df9bf42040195cf770" -"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml deleted file mode 100644 index 2762665..0000000 --- a/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "insee_translator" -version = "0.1.0" -authors = ["Gabriel Augendre "] -edition = "2018" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -jemallocator-global = { version = "0.3", optional = true } -pyembed = { path = "pyembed" } - -[features] -default = [] -jemalloc = ["jemallocator-global", "pyembed/jemalloc"] diff --git a/Pipfile b/Pipfile deleted file mode 100644 index 61a1960..0000000 --- a/Pipfile +++ /dev/null @@ -1,15 +0,0 @@ -[[source]] -name = "pypi" -url = "https://pypi.org/simple" -verify_ssl = true - -[dev-packages] - -[packages] -black = "*" - -[requires] -python_version = "3.7" - -[pipenv] -allow_prereleases = true diff --git a/Pipfile.lock b/Pipfile.lock deleted file mode 100644 index 6ba82ab..0000000 --- a/Pipfile.lock +++ /dev/null @@ -1,57 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "b132de3bc6e041e3fa5ab7a0feb2ee862f488ae8903790188641b70b5e595abd" - }, - "pipfile-spec": 6, - "requires": { - "python_version": "3.7" - }, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "appdirs": { - "hashes": [ - "sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92", - "sha256:d8b24664561d0d34ddfaec54636d502d7cea6e29c3eaf68f3df6180863e2166e" - ], - "version": "==1.4.3" - }, - "attrs": { - "hashes": [ - "sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79", - "sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399" - ], - "version": "==19.1.0" - }, - "black": { - "hashes": [ - "sha256:09a9dcb7c46ed496a9850b76e4e825d6049ecd38b611f1224857a79bd985a8cf", - "sha256:68950ffd4d9169716bcb8719a56c07a2f4485354fec061cdd5910aa07369731c" - ], - "index": "pypi", - "version": "==19.3b0" - }, - "click": { - "hashes": [ - "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13", - "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7" - ], - "version": "==7.0" - }, - "toml": { - "hashes": [ - "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c", - "sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e" - ], - "version": "==0.10.0" - } - }, - "develop": {} -} diff --git a/README.md b/README.md index 267f5d1..710470f 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,16 @@ Extract data from INSEE number (France) +## Getting started + +```shell +pyenv virtualenv 3.9.6 insee +pyenv local insee +poetry install +python insee_number_translator/main.py +python insee_number_translator/main.py 123456789123456 +``` + ## Data sources - cities : https://public.opendatasoft.com/explore/dataset/correspondance-code-insee-code-postal/export/ diff --git a/import/__init__.py b/import/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/import/convert_cities.py b/import/convert_cities.py deleted file mode 100644 index c9ec2a9..0000000 --- a/import/convert_cities.py +++ /dev/null @@ -1,29 +0,0 @@ -# This expects a file named "correspondance-code-insee-code-postal.json" -# You can find one here : https://public.opendatasoft.com/explore/dataset/correspondance-code-insee-code-postal/export/ - -import json - -with open("correspondance-code-insee-code-postal.json", "r") as f: - data = json.load(f) - mapping = dict() - failed = [] - duplicates = [] - for base_item in data: - item = base_item.get("fields") - if not item: - failed.append(base_item) - insee_com = item.get("insee_com") - if insee_com: - if insee_com in mapping: - duplicates.append(base_item) - mapping[insee_com] = { - "name": item.get("nom_comm"), - "zip_code": item.get("postal_code"), - } - else: - failed.append(base_item) - import pprint - - with open("../data/cities.py", "w") as writef: - writef.write(pprint.pformat(mapping, indent=4, compact=True)) - writef.flush() diff --git a/__init__.py b/insee_number_translator/__init__.py similarity index 100% rename from __init__.py rename to insee_number_translator/__init__.py diff --git a/data/__init__.py b/insee_number_translator/data/__init__.py similarity index 100% rename from data/__init__.py rename to insee_number_translator/data/__init__.py diff --git a/data/cities.py b/insee_number_translator/data/cities.py similarity index 100% rename from data/cities.py rename to insee_number_translator/data/cities.py diff --git a/data/countries.py b/insee_number_translator/data/countries.py similarity index 100% rename from data/countries.py rename to insee_number_translator/data/countries.py diff --git a/data/departments.py b/insee_number_translator/data/departments.py similarity index 100% rename from data/departments.py rename to insee_number_translator/data/departments.py diff --git a/main/main.py b/insee_number_translator/main.py similarity index 98% rename from main/main.py rename to insee_number_translator/main.py index a53b8bd..cd82dcf 100644 --- a/main/main.py +++ b/insee_number_translator/main.py @@ -3,9 +3,9 @@ import datetime import pprint import sys -from data.departments import DEPARTMENTS from data.cities import CITIES -from data.countries import COUNTRIES, CONTINENTS +from data.countries import CONTINENTS, COUNTRIES +from data.departments import DEPARTMENTS class InseeData: @@ -159,8 +159,8 @@ def main(): data = InseeData(number) print(data) pprint.pprint(data.to_dict()) - print('\n\n') + print("\n\n") -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/main/__init__.py b/main/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..5f8c836 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,207 @@ +[[package]] +name = "backports.entry-points-selectable" +version = "1.1.0" +description = "Compatibility shim providing selectable entry points for older implementations" +category = "dev" +optional = false +python-versions = ">=2.7" + +[package.extras] +docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] +testing = ["pytest (>=4.6)", "pytest-flake8", "pytest-cov", "pytest-black (>=0.3.7)", "pytest-mypy", "pytest-checkdocs (>=2.4)", "pytest-enabler (>=1.0.1)"] + +[[package]] +name = "cfgv" +version = "3.3.0" +description = "Validate configuration and produce human readable error messages." +category = "dev" +optional = false +python-versions = ">=3.6.1" + +[[package]] +name = "distlib" +version = "0.3.2" +description = "Distribution utilities" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "filelock" +version = "3.0.12" +description = "A platform independent file lock." +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "identify" +version = "2.2.12" +description = "File identification library for Python" +category = "dev" +optional = false +python-versions = ">=3.6.1" + +[package.extras] +license = ["editdistance-s"] + +[[package]] +name = "nodeenv" +version = "1.6.0" +description = "Node.js virtual environment builder" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "platformdirs" +version = "2.2.0" +description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.extras] +docs = ["Sphinx (>=4)", "furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)"] +test = ["appdirs (==1.4.4)", "pytest (>=6)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)"] + +[[package]] +name = "pre-commit" +version = "2.13.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +category = "dev" +optional = false +python-versions = ">=3.6.1" + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +toml = "*" +virtualenv = ">=20.0.8" + +[[package]] +name = "pyyaml" +version = "5.4.1" +description = "YAML parser and emitter for Python" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +category = "dev" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "virtualenv" +version = "20.7.0" +description = "Virtual Python Environment builder" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + +[package.dependencies] +"backports.entry-points-selectable" = ">=1.0.4" +distlib = ">=0.3.1,<1" +filelock = ">=3.0.0,<4" +platformdirs = ">=2,<3" +six = ">=1.9.0,<2" + +[package.extras] +docs = ["proselint (>=0.10.2)", "sphinx (>=3)", "sphinx-argparse (>=0.2.5)", "sphinx-rtd-theme (>=0.4.3)", "towncrier (>=19.9.0rc1)"] +testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)", "packaging (>=20.0)"] + +[metadata] +lock-version = "1.1" +python-versions = "^3.9" +content-hash = "c1cc0f0c13c0a4f97f0629dcf8460e2022e73b90e77bd99d1a3370815da0a11a" + +[metadata.files] +"backports.entry-points-selectable" = [ + {file = "backports.entry_points_selectable-1.1.0-py2.py3-none-any.whl", hash = "sha256:a6d9a871cde5e15b4c4a53e3d43ba890cc6861ec1332c9c2428c92f977192acc"}, + {file = "backports.entry_points_selectable-1.1.0.tar.gz", hash = "sha256:988468260ec1c196dab6ae1149260e2f5472c9110334e5d51adcb77867361f6a"}, +] +cfgv = [ + {file = "cfgv-3.3.0-py2.py3-none-any.whl", hash = "sha256:b449c9c6118fe8cca7fa5e00b9ec60ba08145d281d52164230a69211c5d597a1"}, + {file = "cfgv-3.3.0.tar.gz", hash = "sha256:9e600479b3b99e8af981ecdfc80a0296104ee610cab48a5ae4ffd0b668650eb1"}, +] +distlib = [ + {file = "distlib-0.3.2-py2.py3-none-any.whl", hash = "sha256:23e223426b28491b1ced97dc3bbe183027419dfc7982b4fa2f05d5f3ff10711c"}, + {file = "distlib-0.3.2.zip", hash = "sha256:106fef6dc37dd8c0e2c0a60d3fca3e77460a48907f335fa28420463a6f799736"}, +] +filelock = [ + {file = "filelock-3.0.12-py3-none-any.whl", hash = "sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836"}, + {file = "filelock-3.0.12.tar.gz", hash = "sha256:18d82244ee114f543149c66a6e0c14e9c4f8a1044b5cdaadd0f82159d6a6ff59"}, +] +identify = [ + {file = "identify-2.2.12-py2.py3-none-any.whl", hash = "sha256:a510cbe155f39665625c8a4c4b4f9360cbce539f51f23f47836ab7dd852db541"}, + {file = "identify-2.2.12.tar.gz", hash = "sha256:242332b3bdd45a8af1752d5d5a3afb12bee26f8e67c4be06e394f82d05ef1a4d"}, +] +nodeenv = [ + {file = "nodeenv-1.6.0-py2.py3-none-any.whl", hash = "sha256:621e6b7076565ddcacd2db0294c0381e01fd28945ab36bcf00f41c5daf63bef7"}, + {file = "nodeenv-1.6.0.tar.gz", hash = "sha256:3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b"}, +] +platformdirs = [ + {file = "platformdirs-2.2.0-py3-none-any.whl", hash = "sha256:4666d822218db6a262bdfdc9c39d21f23b4cfdb08af331a81e92751daf6c866c"}, + {file = "platformdirs-2.2.0.tar.gz", hash = "sha256:632daad3ab546bd8e6af0537d09805cec458dce201bccfe23012df73332e181e"}, +] +pre-commit = [ + {file = "pre_commit-2.13.0-py2.py3-none-any.whl", hash = "sha256:b679d0fddd5b9d6d98783ae5f10fd0c4c59954f375b70a58cbe1ce9bcf9809a4"}, + {file = "pre_commit-2.13.0.tar.gz", hash = "sha256:764972c60693dc668ba8e86eb29654ec3144501310f7198742a767bec385a378"}, +] +pyyaml = [ + {file = "PyYAML-5.4.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922"}, + {file = "PyYAML-5.4.1-cp27-cp27m-win32.whl", hash = "sha256:129def1b7c1bf22faffd67b8f3724645203b79d8f4cc81f674654d9902cb4393"}, + {file = "PyYAML-5.4.1-cp27-cp27m-win_amd64.whl", hash = "sha256:4465124ef1b18d9ace298060f4eccc64b0850899ac4ac53294547536533800c8"}, + {file = "PyYAML-5.4.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:bb4191dfc9306777bc594117aee052446b3fa88737cd13b7188d0e7aa8162185"}, + {file = "PyYAML-5.4.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:6c78645d400265a062508ae399b60b8c167bf003db364ecb26dcab2bda048253"}, + {file = "PyYAML-5.4.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:4e0583d24c881e14342eaf4ec5fbc97f934b999a6828693a99157fde912540cc"}, + {file = "PyYAML-5.4.1-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:72a01f726a9c7851ca9bfad6fd09ca4e090a023c00945ea05ba1638c09dc3347"}, + {file = "PyYAML-5.4.1-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:895f61ef02e8fed38159bb70f7e100e00f471eae2bc838cd0f4ebb21e28f8541"}, + {file = "PyYAML-5.4.1-cp36-cp36m-win32.whl", hash = "sha256:3bd0e463264cf257d1ffd2e40223b197271046d09dadf73a0fe82b9c1fc385a5"}, + {file = "PyYAML-5.4.1-cp36-cp36m-win_amd64.whl", hash = "sha256:e4fac90784481d221a8e4b1162afa7c47ed953be40d31ab4629ae917510051df"}, + {file = "PyYAML-5.4.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5accb17103e43963b80e6f837831f38d314a0495500067cb25afab2e8d7a4018"}, + {file = "PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e1d4970ea66be07ae37a3c2e48b5ec63f7ba6804bdddfdbd3cfd954d25a82e63"}, + {file = "PyYAML-5.4.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:cb333c16912324fd5f769fff6bc5de372e9e7a202247b48870bc251ed40239aa"}, + {file = "PyYAML-5.4.1-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:fe69978f3f768926cfa37b867e3843918e012cf83f680806599ddce33c2c68b0"}, + {file = "PyYAML-5.4.1-cp37-cp37m-win32.whl", hash = "sha256:dd5de0646207f053eb0d6c74ae45ba98c3395a571a2891858e87df7c9b9bd51b"}, + {file = "PyYAML-5.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:08682f6b72c722394747bddaf0aa62277e02557c0fd1c42cb853016a38f8dedf"}, + {file = "PyYAML-5.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d2d9808ea7b4af864f35ea216be506ecec180628aced0704e34aca0b040ffe46"}, + {file = "PyYAML-5.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:8c1be557ee92a20f184922c7b6424e8ab6691788e6d86137c5d93c1a6ec1b8fb"}, + {file = "PyYAML-5.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:fd7f6999a8070df521b6384004ef42833b9bd62cfee11a09bda1079b4b704247"}, + {file = "PyYAML-5.4.1-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:bfb51918d4ff3d77c1c856a9699f8492c612cde32fd3bcd344af9be34999bfdc"}, + {file = "PyYAML-5.4.1-cp38-cp38-win32.whl", hash = "sha256:fa5ae20527d8e831e8230cbffd9f8fe952815b2b7dae6ffec25318803a7528fc"}, + {file = "PyYAML-5.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:0f5f5786c0e09baddcd8b4b45f20a7b5d61a7e7e99846e3c799b05c7c53fa696"}, + {file = "PyYAML-5.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:294db365efa064d00b8d1ef65d8ea2c3426ac366c0c4368d930bf1c5fb497f77"}, + {file = "PyYAML-5.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:74c1485f7707cf707a7aef42ef6322b8f97921bd89be2ab6317fd782c2d53183"}, + {file = "PyYAML-5.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:d483ad4e639292c90170eb6f7783ad19490e7a8defb3e46f97dfe4bacae89122"}, + {file = "PyYAML-5.4.1-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:fdc842473cd33f45ff6bce46aea678a54e3d21f1b61a7750ce3c498eedfe25d6"}, + {file = "PyYAML-5.4.1-cp39-cp39-win32.whl", hash = "sha256:49d4cdd9065b9b6e206d0595fee27a96b5dd22618e7520c33204a4a3239d5b10"}, + {file = "PyYAML-5.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:c20cfa2d49991c8b4147af39859b167664f2ad4561704ee74c1de03318e898db"}, + {file = "PyYAML-5.4.1.tar.gz", hash = "sha256:607774cbba28732bfa802b54baa7484215f530991055bb562efbed5b2f20a45e"}, +] +six = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] +toml = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] +virtualenv = [ + {file = "virtualenv-20.7.0-py2.py3-none-any.whl", hash = "sha256:fdfdaaf0979ac03ae7f76d5224a05b58165f3c804f8aa633f3dd6f22fbd435d5"}, + {file = "virtualenv-20.7.0.tar.gz", hash = "sha256:97066a978431ec096d163e72771df5357c5c898ffdd587048f45e0aecc228094"}, +] diff --git a/pyembed/Cargo.toml b/pyembed/Cargo.toml deleted file mode 100644 index 93d209e..0000000 --- a/pyembed/Cargo.toml +++ /dev/null @@ -1,25 +0,0 @@ -[package] -name = "pyembed" -version = "0.2.0" -authors = ["Gregory Szorc "] -edition = "2018" -build = "build.rs" - -[dependencies] -byteorder = "1" -jemalloc-sys = { version = "0.3", optional = true } -libc = "0.2" -uuid = { version = "0.7", features = ["v4"] } - -[dependencies.python3-sys] -git = "https://github.com/indygreg/PyOxidizer.git" -tag = "v0.2.0" - -[dependencies.cpython] -git = "https://github.com/indygreg/PyOxidizer.git" -tag = "v0.2.0" -features = ["link-mode-unresolved-static", "python3-sys", "no-auto-initialize"] - -[features] -default = [] -jemalloc = ["jemalloc-sys"] diff --git a/pyembed/build.rs b/pyembed/build.rs deleted file mode 100644 index f1d2ef5..0000000 --- a/pyembed/build.rs +++ /dev/null @@ -1,65 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use std::env; -use std::path::PathBuf; -use std::process; - -/// Path to pyoxidizer executable this file was created with. -const DEFAULT_PYOXIDIZER_EXE: &str = r#"/Users/gaugendre/.cargo/bin/pyoxidizer"#; - -fn main() { - // We support using pre-built artifacts, in which case we emit the - // cargo metadata lines from the "original" build to "register" the - // artifacts with this cargo invocation. - if env::var("PYOXIDIZER_REUSE_ARTIFACTS").is_ok() { - let artifact_dir_env = env::var("PYOXIDIZER_ARTIFACT_DIR"); - - let artifact_dir_path = match artifact_dir_env { - Ok(ref v) => PathBuf::from(v), - Err(_) => { - let out_dir = env::var("OUT_DIR").unwrap(); - PathBuf::from(&out_dir) - } - }; - - println!( - "using pre-built artifacts from {}", - artifact_dir_path.display() - ); - - println!("cargo:rerun-if-env-changed=PYOXIDIZER_REUSE_ARTIFACTS"); - println!("cargo:rerun-if-env-changed=PYOXIDIZER_ARTIFACT_DIR"); - - // Emit the cargo metadata lines to register libraries for linking. - let cargo_metadata_path = artifact_dir_path.join("cargo_metadata.txt"); - let metadata = std::fs::read_to_string(&cargo_metadata_path) - .expect(format!("failed to read {}", cargo_metadata_path.display()).as_str()); - println!("{}", metadata); - } else { - let pyoxidizer_exe = match env::var("PYOXIDIZER_EXE") { - Ok(value) => value, - Err(_) => DEFAULT_PYOXIDIZER_EXE.to_string(), - }; - - let pyoxidizer_path = PathBuf::from(&pyoxidizer_exe); - - if !pyoxidizer_path.exists() { - panic!("pyoxidizer executable does not exist: {}", &pyoxidizer_exe); - } - - match process::Command::new(&pyoxidizer_exe) - .arg("run-build-script") - .arg("build.rs") - .status() - { - Ok(status) => { - if !status.success() { - panic!("`pyoxidizer run-build-script` failed"); - } - } - Err(e) => panic!("`pyoxidizer run-build-script` failed: {}", e.to_string()), - } - } -} diff --git a/pyembed/src/config.rs b/pyembed/src/config.rs deleted file mode 100644 index 5a93bcc..0000000 --- a/pyembed/src/config.rs +++ /dev/null @@ -1,118 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Data structures for configuring a Python interpreter. - -/// Defines which allocator to use for the raw domain. -#[derive(Clone, Debug)] -pub enum PythonRawAllocator { - /// Use jemalloc. - Jemalloc, - /// Use the Rust global allocator. - Rust, - /// Use the system allocator. - System, -} - -/// Defines Python code to run. -#[derive(Clone, Debug)] -pub enum PythonRunMode { - /// No-op. - None, - /// Run a Python REPL. - Repl, - /// Run a Python module as the main module. - Module { module: String }, - /// Evaluate Python code from a string. - Eval { code: String }, -} - -/// Holds the configuration of an embedded Python interpreter. -/// -/// Instances of this struct can be used to construct Python interpreters. -/// -/// Each instance contains the total state to define the run-time behavior of -/// a Python interpreter. -#[derive(Clone, Debug)] -pub struct PythonConfig { - /// Name of the current program to tell to Python. - pub program_name: String, - - /// Name of encoding for stdio handles. - pub standard_io_encoding: Option, - - /// Name of encoding error mode for stdio handles. - pub standard_io_errors: Option, - - /// Python optimization level. - pub opt_level: i32, - - /// Whether to load our custom frozen importlib bootstrap modules. - pub use_custom_importlib: bool, - - /// Whether to load the filesystem-based sys.meta_path finder. - pub filesystem_importer: bool, - - /// Filesystem paths to add to sys.path. - /// - /// ``$ORIGIN`` will resolve to the directory of the application at - /// run-time. - pub sys_paths: Vec, - - /// Whether to load the site.py module at initialization time. - pub import_site: bool, - - /// Whether to load a user-specific site module at initialization time. - pub import_user_site: bool, - - /// Whether to ignore various PYTHON* environment variables. - pub ignore_python_env: bool, - - /// Whether to suppress writing of ``.pyc`` files when importing ``.py`` - /// files from the filesystem. This is typically irrelevant since modules - /// are imported from memory. - pub dont_write_bytecode: bool, - - /// Whether stdout and stderr streams should be unbuffered. - pub unbuffered_stdio: bool, - - /// Bytecode for the importlib._bootstrap / _frozen_importlib module. - pub frozen_importlib_data: &'static [u8], - - /// Bytecode for the importlib._bootstrap_external / _frozen_importlib_external module. - pub frozen_importlib_external_data: &'static [u8], - - /// Reference to raw Python modules data. - /// - /// The referenced data is produced as part of PyOxidizer packaging. This - /// likely comes from an include_bytes!(...) of a file generated by PyOxidizer. - pub py_modules_data: &'static [u8], - - /// Reference to raw Python resources data. - /// - /// The referenced data is produced as part of PyOxidizer packaging. This - /// likely comes from an include_bytes!(...) of a file generated by PyOxidizer. - pub py_resources_data: &'static [u8], - - /// Whether to set sys.argvb with bytes versions of process arguments. - /// - /// On Windows, bytes will be UTF-16. On POSIX, bytes will be raw char* - /// values passed to `int main()`. - pub argvb: bool, - - /// Which memory allocator to use for the raw domain. - pub raw_allocator: PythonRawAllocator, - - /// Environment variable holding the directory to write a loaded modules file. - /// - /// If this value is set and the environment it refers to is set, - /// on interpreter shutdown, we will write a ``modules-`` file to - /// the directory specified containing a ``\n`` delimited list of modules - /// loaded in ``sys.modules``. - pub write_modules_directory_env: Option, - - /// Defines what code to run by default. - /// - pub run: PythonRunMode, -} diff --git a/pyembed/src/data.rs b/pyembed/src/data.rs deleted file mode 100644 index 5be977b..0000000 --- a/pyembed/src/data.rs +++ /dev/null @@ -1,5 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -include!(env!("PYEMBED_DATA_RS_PATH")); diff --git a/pyembed/src/importer.rs b/pyembed/src/importer.rs deleted file mode 100644 index 414b0d8..0000000 --- a/pyembed/src/importer.rs +++ /dev/null @@ -1,892 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -/*! -Functionality for a Python importer. - -This module defines a Python meta path importer and associated functionality -for importing Python modules from memory. -*/ - -use std::cell::RefCell; -use std::collections::{HashMap, HashSet}; -use std::ffi::CStr; -use std::io::Cursor; -use std::sync::Arc; - -use byteorder::{LittleEndian, ReadBytesExt}; -use cpython::exc::{FileNotFoundError, ImportError, RuntimeError, ValueError}; -use cpython::{ - py_class, py_class_impl, py_coerce_item, py_fn, NoArgs, ObjectProtocol, PyClone, PyDict, PyErr, - PyList, PyModule, PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, -}; -use python3_sys as pyffi; -use python3_sys::{PyBUF_READ, PyMemoryView_FromMemory}; - -use super::pyinterp::PYOXIDIZER_IMPORTER_NAME; - -/// Obtain a Python memoryview referencing a memory slice. -/// -/// New memoryview allows Python to access the underlying memory without -/// copying it. -#[inline] -fn get_memory_view(py: Python, data: &'static [u8]) -> Option { - let ptr = unsafe { PyMemoryView_FromMemory(data.as_ptr() as _, data.len() as _, PyBUF_READ) }; - unsafe { PyObject::from_owned_ptr_opt(py, ptr) } -} - -/// Holds pointers to Python module data in memory. -#[derive(Debug)] -struct PythonModuleData { - source: Option<&'static [u8]>, - bytecode: Option<&'static [u8]>, -} - -impl PythonModuleData { - /// Obtain a PyMemoryView instance for source data. - fn get_source_memory_view(&self, py: Python) -> Option { - match self.source { - Some(data) => get_memory_view(py, data), - None => None, - } - } - - /// Obtain a PyMemoryView instance for bytecode data. - fn get_bytecode_memory_view(&self, py: Python) -> Option { - match self.bytecode { - Some(data) => get_memory_view(py, data), - None => None, - } - } -} - -/// Represents Python modules data in memory. -/// -/// This is essentially an index over a raw backing blob. -struct PythonModulesData { - data: HashMap<&'static str, PythonModuleData>, -} - -impl PythonModulesData { - /// Construct a new instance from a memory slice. - fn from(data: &'static [u8]) -> Result { - let mut reader = Cursor::new(data); - - let count = reader - .read_u32::() - .or_else(|_| Err("failed reading count"))?; - - let mut index = Vec::with_capacity(count as usize); - let mut total_names_length = 0; - let mut total_sources_length = 0; - - for _ in 0..count { - let name_length = reader - .read_u32::() - .or_else(|_| Err("failed reading name length"))? - as usize; - let source_length = reader - .read_u32::() - .or_else(|_| Err("failed reading source length"))? - as usize; - let bytecode_length = reader - .read_u32::() - .or_else(|_| Err("failed reading bytecode length"))? - as usize; - - index.push((name_length, source_length, bytecode_length)); - total_names_length += name_length; - total_sources_length += source_length; - } - - let mut res = HashMap::with_capacity(count as usize); - let sources_start_offset = reader.position() as usize + total_names_length; - let bytecodes_start_offset = sources_start_offset + total_sources_length; - - let mut sources_current_offset: usize = 0; - let mut bytecodes_current_offset: usize = 0; - - for (name_length, source_length, bytecode_length) in index { - let offset = reader.position() as usize; - - let name = - unsafe { std::str::from_utf8_unchecked(&data[offset..offset + name_length]) }; - - let source_offset = sources_start_offset + sources_current_offset; - let source = if source_length > 0 { - Some(&data[source_offset..source_offset + source_length]) - } else { - None - }; - - let bytecode_offset = bytecodes_start_offset + bytecodes_current_offset; - let bytecode = if bytecode_length > 0 { - Some(&data[bytecode_offset..bytecode_offset + bytecode_length]) - } else { - None - }; - - reader.set_position(offset as u64 + name_length as u64); - - sources_current_offset += source_length; - bytecodes_current_offset += bytecode_length; - - res.insert(name, PythonModuleData { source, bytecode }); - } - - Ok(PythonModulesData { data: res }) - } -} - -/// Represents Python resources data in memory. -/// -/// This is essentially an index over a raw backing blob. -struct PythonResourcesData { - packages: HashMap<&'static str, Arc>>>, -} - -impl PythonResourcesData { - fn from(data: &'static [u8]) -> Result { - let mut reader = Cursor::new(data); - - let package_count = reader - .read_u32::() - .or_else(|_| Err("failed reading package count"))? as usize; - - let mut index = Vec::with_capacity(package_count); - let mut total_names_length = 0; - - for _ in 0..package_count { - let package_name_length = reader - .read_u32::() - .or_else(|_| Err("failed reading package name length"))? - as usize; - let resource_count = reader - .read_u32::() - .or_else(|_| Err("failed reading resource count"))? - as usize; - - total_names_length += package_name_length; - - let mut package_index = Vec::with_capacity(resource_count); - - for _ in 0..resource_count { - let resource_name_length = reader - .read_u32::() - .or_else(|_| Err("failed reading resource name length"))? - as usize; - let resource_data_length = reader - .read_u32::() - .or_else(|_| Err("failed reading resource data length"))? - as usize; - - total_names_length += resource_name_length; - - package_index.push((resource_name_length, resource_data_length)); - } - - index.push((package_name_length, package_index)); - } - - let mut name_offset = reader.position() as usize; - let data_offset = name_offset + total_names_length; - let mut res = HashMap::new(); - - for (package_name_length, package_index) in index { - let package_name = unsafe { - std::str::from_utf8_unchecked(&data[name_offset..name_offset + package_name_length]) - }; - - name_offset += package_name_length; - - let mut package_data = Box::new(HashMap::new()); - - for (resource_name_length, resource_data_length) in package_index { - let resource_name = unsafe { - std::str::from_utf8_unchecked( - &data[name_offset..name_offset + resource_name_length], - ) - }; - - name_offset += resource_name_length; - - let resource_data = &data[data_offset..data_offset + resource_data_length]; - - package_data.insert(resource_name, resource_data); - } - - res.insert(package_name, Arc::new(package_data)); - } - - Ok(PythonResourcesData { packages: res }) - } -} - -#[allow(unused_doc_comments)] -/// Python type to import modules. -/// -/// This type implements the importlib.abc.MetaPathFinder interface for -/// finding/loading modules. It supports loading various flavors of modules, -/// allowing it to be the only registered sys.meta_path importer. -py_class!(class PyOxidizerFinder |py| { - data imp_module: PyModule; - data marshal_loads: PyObject; - data builtin_importer: PyObject; - data frozen_importer: PyObject; - data call_with_frames_removed: PyObject; - data module_spec_type: PyObject; - data decode_source: PyObject; - data exec_fn: PyObject; - data packages: HashSet<&'static str>; - data known_modules: KnownModules; - data resources: HashMap<&'static str, Arc>>>; - data resource_readers: RefCell>>; - - // Start of importlib.abc.MetaPathFinder interface. - - def find_spec(&self, fullname: &PyString, path: &PyObject, target: Option = None) -> PyResult { - let key = fullname.to_string(py)?; - - if let Some(flavor) = self.known_modules(py).get(&*key) { - match flavor { - KnownModuleFlavor::Builtin => { - // BuiltinImporter.find_spec() always returns None if `path` is defined. - // And it doesn't use `target`. So don't proxy these values. - self.builtin_importer(py).call_method(py, "find_spec", (fullname,), None) - } - KnownModuleFlavor::Frozen => { - self.frozen_importer(py).call_method(py, "find_spec", (fullname, path, target), None) - } - KnownModuleFlavor::InMemory { .. } => { - let is_package = self.packages(py).contains(&*key); - - // TODO consider setting origin and has_location so __file__ will be - // populated. - - let kwargs = PyDict::new(py); - kwargs.set_item(py, "is_package", is_package)?; - - self.module_spec_type(py).call(py, (fullname, self), Some(&kwargs)) - } - } - } else { - Ok(py.None()) - } - } - - def find_module(&self, _fullname: &PyObject, _path: &PyObject) -> PyResult { - // Method is deprecated. Always returns None. - // We /could/ call find_spec(). Meh. - Ok(py.None()) - } - - def invalidate_caches(&self) -> PyResult { - Ok(py.None()) - } - - // End of importlib.abc.MetaPathFinder interface. - - // Start of importlib.abc.Loader interface. - - def create_module(&self, _spec: &PyObject) -> PyResult { - Ok(py.None()) - } - - def exec_module(&self, module: &PyObject) -> PyResult { - let name = module.getattr(py, "__name__")?; - let key = name.extract::(py)?; - - if let Some(flavor) = self.known_modules(py).get(&*key) { - match flavor { - KnownModuleFlavor::Builtin => { - self.builtin_importer(py).call_method(py, "exec_module", (module,), None) - }, - KnownModuleFlavor::Frozen => { - self.frozen_importer(py).call_method(py, "exec_module", (module,), None) - }, - KnownModuleFlavor::InMemory { module_data } => { - match module_data.get_bytecode_memory_view(py) { - Some(value) => { - let code = self.marshal_loads(py).call(py, (value,), None)?; - let exec_fn = self.exec_fn(py); - let dict = module.getattr(py, "__dict__")?; - - self.call_with_frames_removed(py).call(py, (exec_fn, code, dict), None) - }, - None => { - Err(PyErr::new::(py, ("cannot find code in memory", name))) - } - } - }, - } - } else { - // Raising here might make more sense, as exec_module() shouldn't - // be called on the Loader that didn't create the module. - Ok(py.None()) - } - } - - // End of importlib.abc.Loader interface. - - // Start of importlib.abc.InspectLoader interface. - - def get_code(&self, fullname: &PyString) -> PyResult { - let key = fullname.to_string(py)?; - - if let Some(flavor) = self.known_modules(py).get(&*key) { - match flavor { - KnownModuleFlavor::Frozen => { - let imp_module = self.imp_module(py); - - imp_module.call(py, "get_frozen_object", (fullname,), None) - }, - KnownModuleFlavor::InMemory { module_data } => { - match module_data.get_bytecode_memory_view(py) { - Some(value) => { - self.marshal_loads(py).call(py, (value,), None) - } - None => { - Err(PyErr::new::(py, ("cannot find code in memory", fullname))) - } - } - }, - KnownModuleFlavor::Builtin => { - Ok(py.None()) - } - } - } else { - Ok(py.None()) - } - } - - def get_source(&self, fullname: &PyString) -> PyResult { - let key = fullname.to_string(py)?; - - if let Some(flavor) = self.known_modules(py).get(&*key) { - if let KnownModuleFlavor::InMemory { module_data } = flavor { - match module_data.get_source_memory_view(py) { - Some(value) => { - self.decode_source(py).call(py, (value,), None) - }, - None => { - Err(PyErr::new::(py, ("source not available", fullname))) - } - } - } else { - Ok(py.None()) - } - } else { - Ok(py.None()) - } - } - - // End of importlib.abc.InspectLoader interface. - - // Support obtaining ResourceReader instances. - def get_resource_loader(&self, fullname: &PyString) -> PyResult { - let key = fullname.to_string(py)?; - - // This should not happen since code below should not be recursive into this - // function. - let mut resource_readers = match self.resource_readers(py).try_borrow_mut() { - Ok(v) => v, - Err(_) => { - return Err(PyErr::new::(py, "resource reader already borrowed")); - } - }; - - // Return an existing instance if we have one. - if let Some(reader) = resource_readers.get(&*key) { - return Ok(reader.clone_ref(py)); - } - - // Only create a reader if the name is a package. - if self.packages(py).contains(&*key) { - - // Not all packages have known resources. - let resources = match self.resources(py).get(&*key) { - Some(v) => v.clone(), - None => { - let h: Box> = Box::new(HashMap::new()); - Arc::new(h) - } - }; - - let reader = PyOxidizerResourceReader::create_instance(py, resources)?.into_object(); - resource_readers.insert(key.to_string(), reader.clone_ref(py)); - - Ok(reader) - } else { - Ok(py.None()) - } - } -}); - -#[allow(unused_doc_comments)] -/// Implements in-memory reading of resource data. -/// -/// Implements importlib.abc.ResourceReader. -py_class!(class PyOxidizerResourceReader |py| { - data resources: Arc>>; - - /// Returns an opened, file-like object for binary reading of the resource. - /// - /// If the resource cannot be found, FileNotFoundError is raised. - def open_resource(&self, resource: &PyString) -> PyResult { - let key = resource.to_string(py)?; - - if let Some(data) = self.resources(py).get(&*key) { - match get_memory_view(py, data) { - Some(mv) => { - let io_module = py.import("io")?; - let bytes_io = io_module.get(py, "BytesIO")?; - - bytes_io.call(py, (mv,), None) - } - None => Err(PyErr::fetch(py)) - } - } else { - Err(PyErr::new::(py, "resource not found")) - } - } - - /// Returns the file system path to the resource. - /// - /// If the resource does not concretely exist on the file system, raise - /// FileNotFoundError. - def resource_path(&self, _resource: &PyString) -> PyResult { - Err(PyErr::new::(py, "in-memory resources do not have filesystem paths")) - } - - /// Returns True if the named name is considered a resource. FileNotFoundError - /// is raised if name does not exist. - def is_resource(&self, name: &PyString) -> PyResult { - let key = name.to_string(py)?; - - if self.resources(py).contains_key(&*key) { - Ok(py.True().as_object().clone_ref(py)) - } else { - Err(PyErr::new::(py, "resource not found")) - } - } - - /// Returns an iterable of strings over the contents of the package. - /// - /// Do note that it is not required that all names returned by the iterator be actual resources, - /// e.g. it is acceptable to return names for which is_resource() would be false. - /// - /// Allowing non-resource names to be returned is to allow for situations where how a package - /// and its resources are stored are known a priori and the non-resource names would be useful. - /// For instance, returning subdirectory names is allowed so that when it is known that the - /// package and resources are stored on the file system then those subdirectory names can be - /// used directly. - def contents(&self) -> PyResult { - let resources = self.resources(py); - let mut names = Vec::with_capacity(resources.len()); - - for name in resources.keys() { - names.push(name.to_py_object(py)); - } - - let names_list = names.to_py_object(py); - - Ok(names_list.as_object().clone_ref(py)) - } -}); - -fn populate_packages(packages: &mut HashSet<&'static str>, name: &'static str) { - let mut search = name; - - while let Some(idx) = search.rfind('.') { - packages.insert(&search[0..idx]); - search = &search[0..idx]; - } -} - -const DOC: &[u8] = b"Binary representation of Python modules\0"; - -/// Represents global module state to be passed at interpreter initialization time. -#[derive(Debug)] -pub struct InitModuleState { - /// Whether to register the filesystem importer on sys.meta_path. - pub register_filesystem_importer: bool, - - /// Values to set on sys.path. - pub sys_paths: Vec, - - /// Raw data constituting Python module source code. - pub py_modules_data: &'static [u8], - - /// Raw data constituting Python resources data. - pub py_resources_data: &'static [u8], -} - -/// Holds reference to next module state struct. -/// -/// This module state will be copied into the module's state when the -/// Python module is initialized. -pub static mut NEXT_MODULE_STATE: *const InitModuleState = std::ptr::null(); - -/// Represents which importer to use for known modules. -#[derive(Debug)] -enum KnownModuleFlavor { - Builtin, - Frozen, - InMemory { module_data: PythonModuleData }, -} - -type KnownModules = HashMap<&'static str, KnownModuleFlavor>; - -/// State associated with each importer module instance. -/// -/// We write per-module state to per-module instances of this struct so -/// we don't rely on global variables and so multiple importer modules can -/// exist without issue. -#[derive(Debug)] -struct ModuleState { - /// Whether to register PathFinder on sys.meta_path. - register_filesystem_importer: bool, - - /// Values to set on sys.path. - sys_paths: Vec, - - /// Raw data constituting Python module source code. - py_modules_data: &'static [u8], - - /// Raw data constituting Python resources data. - py_resources_data: &'static [u8], - - /// Whether setup() has been called. - setup_called: bool, -} - -/// Obtain the module state for an instance of our importer module. -/// -/// Creates a Python exception on failure. -/// -/// Doesn't do type checking that the PyModule is of the appropriate type. -fn get_module_state<'a>(py: Python, m: &'a PyModule) -> Result<&'a mut ModuleState, PyErr> { - let ptr = m.as_object().as_ptr(); - let state = unsafe { pyffi::PyModule_GetState(ptr) as *mut ModuleState }; - - if state.is_null() { - let err = PyErr::new::(py, "unable to retrieve module state"); - return Err(err); - } - - Ok(unsafe { &mut *state }) -} - -/// Initialize the Python module object. -/// -/// This is called as part of the PyInit_* function to create the internal -/// module object for the interpreter. -/// -/// This receives a handle to the current Python interpreter and just-created -/// Python module instance. It populates the internal module state and registers -/// a _setup() on the module object for usage by Python. -/// -/// Because this function accesses NEXT_MODULE_STATE, it should only be -/// called during interpreter initialization. -fn module_init(py: Python, m: &PyModule) -> PyResult<()> { - let mut state = get_module_state(py, m)?; - - unsafe { - state.register_filesystem_importer = (*NEXT_MODULE_STATE).register_filesystem_importer; - // TODO we could move the value if we wanted to avoid the clone(). - state.sys_paths = (*NEXT_MODULE_STATE).sys_paths.clone(); - state.py_modules_data = (*NEXT_MODULE_STATE).py_modules_data; - state.py_resources_data = (*NEXT_MODULE_STATE).py_resources_data; - } - - state.setup_called = false; - - m.add( - py, - "_setup", - py_fn!( - py, - module_setup( - m: PyModule, - bootstrap_module: PyModule, - marshal_module: PyModule, - decode_source: PyObject - ) - ), - )?; - - Ok(()) -} - -/// Called after module import/initialization to configure the importing mechanism. -/// -/// This does the heavy work of configuring the importing mechanism. -/// -/// This function should only be called once as part of -/// _frozen_importlib_external._install_external_importers(). -fn module_setup( - py: Python, - m: PyModule, - bootstrap_module: PyModule, - marshal_module: PyModule, - decode_source: PyObject, -) -> PyResult { - let state = get_module_state(py, &m)?; - - if state.setup_called { - return Err(PyErr::new::( - py, - "PyOxidizer _setup() already called", - )); - } - - state.setup_called = true; - - let imp_module = bootstrap_module.get(py, "_imp")?; - let imp_module = imp_module.cast_into::(py)?; - let sys_module = bootstrap_module.get(py, "sys")?; - let sys_module = sys_module.cast_as::(py)?; - let meta_path_object = sys_module.get(py, "meta_path")?; - - // We should be executing as part of - // _frozen_importlib_external._install_external_importers(). - // _frozen_importlib._install() should have already been called and set up - // sys.meta_path with [BuiltinImporter, FrozenImporter]. Those should be the - // only meta path importers present. - - let meta_path = meta_path_object.cast_as::(py)?; - - if meta_path.len(py) != 2 { - return Err(PyErr::new::( - py, - "sys.meta_path does not contain 2 values", - )); - } - - let builtin_importer = meta_path.get_item(py, 0); - let frozen_importer = meta_path.get_item(py, 1); - - // It may seem inefficient to create a full HashMap of the parsed data instead of e.g. - // streaming it. But the overhead of iterators was measured to be more than building - // up a temporary HashMap. - let modules_data = match PythonModulesData::from(state.py_modules_data) { - Ok(v) => v, - Err(msg) => return Err(PyErr::new::(py, msg)), - }; - - // Populate our known module lookup table with entries from builtins, frozens, and - // finally us. Last write wins and has the same effect as registering our - // meta path importer first. This should be safe. If nothing else, it allows - // some builtins to be overwritten by .py implemented modules. - let mut known_modules = KnownModules::with_capacity(modules_data.data.len() + 10); - - for i in 0.. { - let record = unsafe { pyffi::PyImport_Inittab.offset(i) }; - - if unsafe { *record }.name.is_null() { - break; - } - - let name = unsafe { CStr::from_ptr((*record).name as _) }; - let name_str = match name.to_str() { - Ok(v) => v, - Err(_) => { - return Err(PyErr::new::( - py, - "unable to parse PyImport_Inittab", - )); - } - }; - - known_modules.insert(name_str, KnownModuleFlavor::Builtin); - } - - for i in 0.. { - let record = unsafe { pyffi::PyImport_FrozenModules.offset(i) }; - - if unsafe { *record }.name.is_null() { - break; - } - - let name = unsafe { CStr::from_ptr((*record).name as _) }; - let name_str = match name.to_str() { - Ok(v) => v, - Err(_) => { - return Err(PyErr::new::( - py, - "unable to parse PyImport_FrozenModules", - )); - } - }; - - known_modules.insert(name_str, KnownModuleFlavor::Frozen); - } - - // TODO consider baking set of packages into embedded data. - let mut packages: HashSet<&'static str> = HashSet::with_capacity(modules_data.data.len()); - - for (name, record) in modules_data.data { - known_modules.insert( - name, - KnownModuleFlavor::InMemory { - module_data: record, - }, - ); - populate_packages(&mut packages, name); - } - - let resources_data = match PythonResourcesData::from(state.py_resources_data) { - Ok(v) => v, - Err(msg) => return Err(PyErr::new::(py, msg)), - }; - - let marshal_loads = marshal_module.get(py, "loads")?; - let call_with_frames_removed = bootstrap_module.get(py, "_call_with_frames_removed")?; - let module_spec_type = bootstrap_module.get(py, "ModuleSpec")?; - - let builtins_module = - match unsafe { PyObject::from_borrowed_ptr_opt(py, pyffi::PyEval_GetBuiltins()) } { - Some(o) => o.cast_into::(py), - None => { - return Err(PyErr::new::( - py, - "unable to obtain __builtins__", - )); - } - }?; - - let exec_fn = match builtins_module.get_item(py, "exec") { - Some(v) => v, - None => { - return Err(PyErr::new::( - py, - "could not obtain __builtins__.exec", - )); - } - }; - - let resource_readers: RefCell>> = - RefCell::new(Box::new(HashMap::new())); - - let unified_importer = PyOxidizerFinder::create_instance( - py, - imp_module, - marshal_loads, - builtin_importer, - frozen_importer, - call_with_frames_removed, - module_spec_type, - decode_source, - exec_fn, - packages, - known_modules, - resources_data.packages, - resource_readers, - )?; - meta_path_object.call_method(py, "clear", NoArgs, None)?; - meta_path_object.call_method(py, "append", (unified_importer,), None)?; - - // At this point the importing mechanism is fully initialized to use our - // unified importer, which handles built-in, frozen, and in-memory imports. - - // Because we're probably running during Py_Initialize() and stdlib modules - // may not be in-memory, we need to register and configure additional importers - // here, before continuing with Py_Initialize(), otherwise we may not find - // the standard library! - - if state.register_filesystem_importer { - // This is what importlib._bootstrap_external usually does: - // supported_loaders = _get_supported_file_loaders() - // sys.path_hooks.extend([FileFinder.path_hook(*supported_loaders)]) - // sys.meta_path.append(PathFinder) - let frozen_importlib_external = py.import("_frozen_importlib_external")?; - - let loaders = - frozen_importlib_external.call(py, "_get_supported_file_loaders", NoArgs, None)?; - let loaders_list = loaders.cast_as::(py)?; - let loaders_vec: Vec = loaders_list.iter(py).collect(); - let loaders_tuple = PyTuple::new(py, loaders_vec.as_slice()); - - let file_finder = frozen_importlib_external.get(py, "FileFinder")?; - let path_hook = file_finder.call_method(py, "path_hook", loaders_tuple, None)?; - let path_hooks = sys_module.get(py, "path_hooks")?; - path_hooks.call_method(py, "append", (path_hook,), None)?; - - let path_finder = frozen_importlib_external.get(py, "PathFinder")?; - let meta_path = sys_module.get(py, "meta_path")?; - meta_path.call_method(py, "append", (path_finder,), None)?; - } - - // Ideally we should be calling Py_SetPath() before Py_Initialize() to set sys.path. - // But we tried to do this and only ran into problems due to string conversions, - // unwanted side-effects. Updating sys.path directly before it is used by PathFinder - // (which was just registered above) should have the same effect. - - // Always clear out sys.path. - let sys_path = sys_module.get(py, "path")?; - sys_path.call_method(py, "clear", NoArgs, None)?; - - // And repopulate it with entries from the config. - for path in &state.sys_paths { - let py_path = PyString::new(py, path.as_str()); - - sys_path.call_method(py, "append", (py_path,), None)?; - } - - Ok(py.None()) -} - -static mut MODULE_DEF: pyffi::PyModuleDef = pyffi::PyModuleDef { - m_base: pyffi::PyModuleDef_HEAD_INIT, - m_name: std::ptr::null(), - m_doc: std::ptr::null(), - m_size: std::mem::size_of::() as isize, - m_methods: 0 as *mut _, - m_slots: 0 as *mut _, - m_traverse: None, - m_clear: None, - m_free: None, -}; - -/// Module initialization function. -/// -/// This creates the Python module object. -/// -/// We don't use the macros in the cpython crate because they are somewhat -/// opinionated about how things should work. e.g. they call -/// PyEval_InitThreads(), which is undesired. We want total control. -#[allow(non_snake_case)] -pub extern "C" fn PyInit__pyoxidizer_importer() -> *mut pyffi::PyObject { - let py = unsafe { cpython::Python::assume_gil_acquired() }; - - // TRACKING RUST1.32 We can't call as_ptr() in const fn in Rust 1.31. - unsafe { - if MODULE_DEF.m_name.is_null() { - MODULE_DEF.m_name = PYOXIDIZER_IMPORTER_NAME.as_ptr() as *const _; - MODULE_DEF.m_doc = DOC.as_ptr() as *const _; - } - } - - let module = unsafe { pyffi::PyModule_Create(&mut MODULE_DEF) }; - - if module.is_null() { - return module; - } - - let module = match unsafe { PyObject::from_owned_ptr(py, module).cast_into::(py) } { - Ok(m) => m, - Err(e) => { - PyErr::from(e).restore(py); - return std::ptr::null_mut(); - } - }; - - match module_init(py, &module) { - Ok(()) => module.into_object().steal_ptr(), - Err(e) => { - e.restore(py); - std::ptr::null_mut() - } - } -} diff --git a/pyembed/src/lib.rs b/pyembed/src/lib.rs deleted file mode 100644 index 791eb72..0000000 --- a/pyembed/src/lib.rs +++ /dev/null @@ -1,34 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -/*! -Manage an embedded Python interpreter. - -The `pyembed` crate contains functionality for managing a Python interpreter -embedded in the current binary. This crate is typically used along with -[PyOxidizer](https://github.com/indygreg/PyOxidizer) for producing -self-contained binaries containing Python. - -The most important types are [`PythonConfig`](struct.PythonConfig.html) and -[`MainPythonInterpreter`](struct.MainPythonInterpreter.html). A `PythonConfig` -defines how a Python interpreter is to behave. A `MainPythonInterpreter` -creates and manages that interpreter and serves as a high-level interface for -running code in the interpreter. -*/ - -mod config; -mod data; -mod importer; -mod pyalloc; -mod pyinterp; -mod pystr; - -#[allow(unused_imports)] -pub use crate::config::PythonConfig; - -#[allow(unused_imports)] -pub use crate::data::default_python_config; - -#[allow(unused_imports)] -pub use crate::pyinterp::MainPythonInterpreter; diff --git a/pyembed/src/pyalloc.rs b/pyembed/src/pyalloc.rs deleted file mode 100644 index 60912f7..0000000 --- a/pyembed/src/pyalloc.rs +++ /dev/null @@ -1,221 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Custom Python memory allocators. - -#[cfg(feature = "jemalloc-sys")] -use jemalloc_sys as jemallocffi; -use libc::{c_void, size_t}; -use python3_sys as pyffi; -use std::alloc; -use std::collections::HashMap; -#[cfg(feature = "jemalloc-sys")] -use std::ptr::null_mut; - -const MIN_ALIGN: usize = 16; - -type RawAllocatorState = HashMap<*mut u8, alloc::Layout>; - -/// Holds state for the raw memory allocator. -/// -/// Ideally we wouldn't need to track state. But Rust's dealloc() API -/// requires passing in a Layout that matches the allocation. This means -/// we need to track the Layout for each allocation. This data structure -/// facilitates that. -/// -/// TODO HashMap isn't thread safe and the Python raw allocator doesn't -/// hold the GIL. So we need a thread safe map or a mutex guarding access. -pub struct RawAllocator { - pub allocator: pyffi::PyMemAllocatorEx, - _state: Box, -} - -extern "C" fn raw_rust_malloc(ctx: *mut c_void, size: size_t) -> *mut c_void { - // PyMem_RawMalloc()'s docs say: Requesting zero bytes returns a distinct - // non-NULL pointer if possible, as if PyMem_RawMalloc(1) had been called - // instead. - let size = match size { - 0 => 1, - val => val, - }; - - unsafe { - let state = ctx as *mut RawAllocatorState; - let layout = alloc::Layout::from_size_align_unchecked(size, MIN_ALIGN); - let res = alloc::alloc(layout); - - (*state).insert(res, layout); - - //println!("allocated {} bytes to {:?}", size, res); - res as *mut c_void - } -} - -extern "C" fn raw_rust_calloc(ctx: *mut c_void, nelem: size_t, elsize: size_t) -> *mut c_void { - // PyMem_RawCalloc()'s docs say: Requesting zero elements or elements of - // size zero bytes returns a distinct non-NULL pointer if possible, as if - // PyMem_RawCalloc(1, 1) had been called instead. - let size = match nelem * elsize { - 0 => 1, - val => val, - }; - - unsafe { - let state = ctx as *mut RawAllocatorState; - let layout = alloc::Layout::from_size_align_unchecked(size, MIN_ALIGN); - let res = alloc::alloc_zeroed(layout); - - (*state).insert(res, layout); - - //println!("zero allocated {} bytes to {:?}", size, res); - - res as *mut c_void - } -} - -extern "C" fn raw_rust_realloc( - ctx: *mut c_void, - ptr: *mut c_void, - new_size: size_t, -) -> *mut c_void { - //println!("reallocating {:?} to {} bytes", ptr as *mut u8, new_size); - - // PyMem_RawRealloc()'s docs say: If p is NULL, the call is equivalent to - // PyMem_RawMalloc(n); else if n is equal to zero, the memory block is - // resized but is not freed, and the returned pointer is non-NULL. - if ptr.is_null() { - return raw_rust_malloc(ctx, new_size); - } - - let new_size = match new_size { - 0 => 1, - val => val, - }; - - unsafe { - let state = ctx as *mut RawAllocatorState; - let layout = alloc::Layout::from_size_align_unchecked(new_size, MIN_ALIGN); - - let key = ptr as *mut u8; - let old_layout = (*state) - .remove(&key) - .expect("original memory address not tracked"); - - let res = alloc::realloc(ptr as *mut u8, old_layout, new_size); - - (*state).insert(res, layout); - - res as *mut c_void - } -} - -extern "C" fn raw_rust_free(ctx: *mut c_void, ptr: *mut c_void) { - if ptr.is_null() { - return; - } - - //println!("freeing {:?}", ptr as *mut u8); - unsafe { - let state = ctx as *mut RawAllocatorState; - - let key = ptr as *mut u8; - let layout = (*state) - .get(&key) - .expect(format!("could not find allocated memory record: {:?}", key).as_str()); - - alloc::dealloc(key, *layout); - (*state).remove(&key); - } -} - -pub fn make_raw_rust_memory_allocator() -> RawAllocator { - // We need to allocate the HashMap on the heap so the pointer doesn't refer - // to the stack. We rebox and add the Box to our struct so lifetimes are - // managed. - let alloc = Box::new(HashMap::<*mut u8, alloc::Layout>::new()); - let state = Box::into_raw(alloc); - - let allocator = pyffi::PyMemAllocatorEx { - ctx: state as *mut c_void, - malloc: Some(raw_rust_malloc), - calloc: Some(raw_rust_calloc), - realloc: Some(raw_rust_realloc), - free: Some(raw_rust_free), - }; - - RawAllocator { - allocator, - _state: unsafe { Box::from_raw(state) }, - } -} - -// Now let's define a raw memory allocator that interfaces directly with jemalloc. -// This avoids the overhead of going through Rust's allocation layer. - -#[cfg(feature = "jemalloc-sys")] -extern "C" fn raw_jemalloc_malloc(_ctx: *mut c_void, size: size_t) -> *mut c_void { - // PyMem_RawMalloc()'s docs say: Requesting zero bytes returns a distinct - // non-NULL pointer if possible, as if PyMem_RawMalloc(1) had been called - // instead. - let size = match size { - 0 => 1, - val => val, - }; - - unsafe { jemallocffi::mallocx(size, 0) } -} - -#[cfg(feature = "jemalloc-sys")] -extern "C" fn raw_jemalloc_calloc(_ctx: *mut c_void, nelem: size_t, elsize: size_t) -> *mut c_void { - // PyMem_RawCalloc()'s docs say: Requesting zero elements or elements of - // size zero bytes returns a distinct non-NULL pointer if possible, as if - // PyMem_RawCalloc(1, 1) had been called instead. - let size = match nelem * elsize { - 0 => 1, - val => val, - }; - - unsafe { jemallocffi::mallocx(size, jemallocffi::MALLOCX_ZERO) } -} - -#[cfg(feature = "jemalloc-sys")] -extern "C" fn raw_jemalloc_realloc( - ctx: *mut c_void, - ptr: *mut c_void, - new_size: size_t, -) -> *mut c_void { - // PyMem_RawRealloc()'s docs say: If p is NULL, the call is equivalent to - // PyMem_RawMalloc(n); else if n is equal to zero, the memory block is - // resized but is not freed, and the returned pointer is non-NULL. - if ptr.is_null() { - return raw_jemalloc_malloc(ctx, new_size); - } - - let new_size = match new_size { - 0 => 1, - val => val, - }; - - unsafe { jemallocffi::rallocx(ptr, new_size, 0) } -} - -#[cfg(feature = "jemalloc-sys")] -extern "C" fn raw_jemalloc_free(_ctx: *mut c_void, ptr: *mut c_void) { - if ptr.is_null() { - return; - } - - unsafe { jemallocffi::dallocx(ptr, 0) } -} - -#[cfg(feature = "jemalloc-sys")] -pub fn make_raw_jemalloc_allocator() -> pyffi::PyMemAllocatorEx { - pyffi::PyMemAllocatorEx { - ctx: null_mut(), - malloc: Some(raw_jemalloc_malloc), - calloc: Some(raw_jemalloc_calloc), - realloc: Some(raw_jemalloc_realloc), - free: Some(raw_jemalloc_free), - } -} diff --git a/pyembed/src/pyinterp.rs b/pyembed/src/pyinterp.rs deleted file mode 100644 index 898b5bb..0000000 --- a/pyembed/src/pyinterp.rs +++ /dev/null @@ -1,776 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Manage an embedded Python interpreter. - -use libc::c_char; -use python3_sys as pyffi; -use std::collections::BTreeSet; -use std::env; -use std::ffi::CString; -use std::fs; -use std::io::Write; -use std::path::PathBuf; -use std::ptr::null; - -use cpython::exc::ValueError; -use cpython::{ - GILGuard, NoArgs, ObjectProtocol, PyClone, PyDict, PyErr, PyList, PyModule, PyObject, PyResult, - Python, PythonObject, ToPyObject, -}; - -use super::config::{PythonConfig, PythonRawAllocator, PythonRunMode}; -use super::importer::PyInit__pyoxidizer_importer; -#[cfg(feature = "jemalloc-sys")] -use super::pyalloc::make_raw_jemalloc_allocator; -use super::pyalloc::{make_raw_rust_memory_allocator, RawAllocator}; -use super::pystr::{osstring_to_bytes, osstring_to_str, OwnedPyStr}; - -pub const PYOXIDIZER_IMPORTER_NAME: &[u8] = b"_pyoxidizer_importer\0"; - -const FROZEN_IMPORTLIB_NAME: &[u8] = b"_frozen_importlib\0"; -const FROZEN_IMPORTLIB_EXTERNAL_NAME: &[u8] = b"_frozen_importlib_external\0"; - -/// Represents the results of executing Python code with exception handling. -#[derive(Debug)] -pub enum PythonRunResult { - /// Code executed without raising an exception. - Ok {}, - /// Code executed and raised an exception. - Err {}, - /// Code executed and raised SystemExit with the specified exit code. - Exit { code: i32 }, -} - -fn make_custom_frozen_modules(config: &PythonConfig) -> [pyffi::_frozen; 3] { - [ - pyffi::_frozen { - name: FROZEN_IMPORTLIB_NAME.as_ptr() as *const i8, - code: config.frozen_importlib_data.as_ptr(), - size: config.frozen_importlib_data.len() as i32, - }, - pyffi::_frozen { - name: FROZEN_IMPORTLIB_EXTERNAL_NAME.as_ptr() as *const i8, - code: config.frozen_importlib_external_data.as_ptr(), - size: config.frozen_importlib_external_data.len() as i32, - }, - pyffi::_frozen { - name: null(), - code: null(), - size: 0, - }, - ] -} - -#[cfg(windows)] -extern "C" { - pub fn __acrt_iob_func(x: u32) -> *mut libc::FILE; -} - -#[cfg(windows)] -fn stdin_to_file() -> *mut libc::FILE { - // The stdin symbol is made available by importing . On Windows, - // stdin is defined in corecrt_wstdio.h as a `#define` that calls this - // internal CRT function. There's no exported symbol to use. So we - // emulate the behavior of the C code. - // - // Relying on an internal CRT symbol is probably wrong. But Microsoft - // typically keeps backwards compatibility for undocumented functions - // like this because people use them in the wild. - // - // An attempt was made to use fdopen(0) like we do on POSIX. However, - // this causes a crash. The Microsoft C Runtime is already bending over - // backwards to coerce its native HANDLEs into POSIX file descriptors. - // Even if there are other ways to coerce a FILE* from a HANDLE - // (_open_osfhandle() + _fdopen() might work), using the same function - // that uses to obtain a FILE* seems like the least risky thing - // to do. - unsafe { __acrt_iob_func(0) } -} - -#[cfg(unix)] -fn stdin_to_file() -> *mut libc::FILE { - unsafe { libc::fdopen(libc::STDIN_FILENO, &('r' as libc::c_char)) } -} - -#[cfg(windows)] -fn stderr_to_file() -> *mut libc::FILE { - unsafe { __acrt_iob_func(2) } -} - -#[cfg(unix)] -fn stderr_to_file() -> *mut libc::FILE { - unsafe { libc::fdopen(libc::STDERR_FILENO, &('w' as libc::c_char)) } -} - -#[cfg(feature = "jemalloc-sys")] -fn raw_jemallocator() -> pyffi::PyMemAllocatorEx { - make_raw_jemalloc_allocator() -} - -#[cfg(not(feature = "jemalloc-sys"))] -fn raw_jemallocator() -> pyffi::PyMemAllocatorEx { - panic!("jemalloc is not available in this build configuration"); -} - -/// Manages an embedded Python interpreter. -/// -/// **Warning: Python interpreters have global state. There should only be a -/// single instance of this type per process.** -/// -/// Instances must only be constructed through [`MainPythonInterpreter::new()`](#method.new). -/// -/// This type and its various functionality is a glorified wrapper around the -/// Python C API. But there's a lot of added functionality on top of what the C -/// API provides. -/// -/// Both the low-level `python3-sys` and higher-level `cpython` crates are used. -pub struct MainPythonInterpreter<'a> { - pub config: PythonConfig, - frozen_modules: [pyffi::_frozen; 3], - init_run: bool, - raw_allocator: Option, - raw_rust_allocator: Option, - gil: Option, - py: Option>, - program_name: Option, -} - -impl<'a> MainPythonInterpreter<'a> { - /// Construct a Python interpreter from a configuration. - /// - /// The Python interpreter is initialized as a side-effect. The GIL is held. - pub fn new(config: PythonConfig) -> Result, &'static str> { - let (raw_allocator, raw_rust_allocator) = match config.raw_allocator { - PythonRawAllocator::Jemalloc => (Some(raw_jemallocator()), None), - PythonRawAllocator::Rust => (None, Some(make_raw_rust_memory_allocator())), - PythonRawAllocator::System => (None, None), - }; - - let frozen_modules = make_custom_frozen_modules(&config); - - let mut res = MainPythonInterpreter { - config, - frozen_modules, - init_run: false, - raw_allocator, - raw_rust_allocator, - gil: None, - py: None, - program_name: None, - }; - - res.init()?; - - Ok(res) - } - - /// Initialize the interpreter. - /// - /// This mutates global state in the Python interpreter according to the - /// bound config and initializes the Python interpreter. - /// - /// After this is called, the embedded Python interpreter is ready to - /// execute custom code. - /// - /// If called more than once, the function is a no-op from the perspective - /// of interpreter initialization. - /// - /// Returns a Python instance which has the GIL acquired. - fn init(&mut self) -> Result { - if self.init_run { - return Ok(self.acquire_gil()); - } - - let config = &self.config; - - let exe = env::current_exe().or_else(|_| Err("could not obtain current exe"))?; - let origin = exe - .parent() - .ok_or_else(|| "unable to get exe parent")? - .display() - .to_string(); - - let sys_paths: Vec = config - .sys_paths - .iter() - .map(|path| path.replace("$ORIGIN", &origin)) - .collect(); - - // TODO should we call PyMem::SetupDebugHooks() if enabled? - if let Some(raw_allocator) = &self.raw_allocator { - unsafe { - let ptr = raw_allocator as *const _; - pyffi::PyMem_SetAllocator( - pyffi::PyMemAllocatorDomain::PYMEM_DOMAIN_RAW, - ptr as *mut _, - ); - } - } else if let Some(raw_rust_allocator) = &self.raw_rust_allocator { - unsafe { - let ptr = &raw_rust_allocator.allocator as *const _; - pyffi::PyMem_SetAllocator( - pyffi::PyMemAllocatorDomain::PYMEM_DOMAIN_RAW, - ptr as *mut _, - ); - } - } - - // Module state is a bit wonky. - // - // Our in-memory importer relies on a special module which holds references - // to Python objects exposing module/resource data. This module is imported as - // part of initializing the Python interpreter. - // - // This Python module object needs to hold references to the raw Python module - // and resource data. Those references are defined by the InitModuleState struct. - // - // Unfortunately, we can't easily associate state with the interpreter before - // calling Py_Initialize(). And the module initialization function receives no - // arguments. Our solution is to update a global pointer to point at "our" state - // then call Py_Initialize(). The module will be initialized as part of calling - // Py_Initialize(). It will copy the contents at the pointer into the local - // module state and the global pointer will be unused after that. The end result - // is that we have no reliance on global variables outside of a short window - // between now and when Py_Initialize() is called. - // - // We could potentially do away with this global variable by using a closure for - // the initialization function. But this rabbit hole may involve gross hackery - // like dynamic module names. It probably isn't worth it. - - // It is important for references in this struct to have a lifetime of at least - // that of the interpreter. - // TODO specify lifetimes so the compiler validates this for us. - let module_state = super::importer::InitModuleState { - register_filesystem_importer: self.config.filesystem_importer, - sys_paths, - py_modules_data: config.py_modules_data, - py_resources_data: config.py_resources_data, - }; - - if config.use_custom_importlib { - // Replace the frozen modules in the interpreter with our custom set - // that knows how to import from memory. - unsafe { - pyffi::PyImport_FrozenModules = self.frozen_modules.as_ptr(); - } - - // Register our _pyoxidizer_importer extension which provides importing functionality. - unsafe { - // name char* needs to live as long as the interpreter is active. - pyffi::PyImport_AppendInittab( - PYOXIDIZER_IMPORTER_NAME.as_ptr() as *const i8, - Some(PyInit__pyoxidizer_importer), - ); - - // Move pointer to our stack allocated instance. This pointer will be - // accessed when creating the Python module object, which should be - // done automatically as part of low-level interpreter initialization - // when calling Py_Initialize() below. - super::importer::NEXT_MODULE_STATE = &module_state; - } - } - - let home = - OwnedPyStr::from_str(exe.to_str().ok_or_else(|| "unable to convert exe to str")?)?; - - unsafe { - // Pointer needs to live for lifetime of interpreter. - pyffi::Py_SetPythonHome(home.as_wchar_ptr()); - } - - let program_name = OwnedPyStr::from_str(config.program_name.as_str())?; - - unsafe { - pyffi::Py_SetProgramName(program_name.as_wchar_ptr()); - } - - // Value needs to live for lifetime of interpreter. - self.program_name = Some(program_name); - - // If we don't call Py_SetPath(), Python has its own logic for initializing it. - // We set it to an empty string because we don't want any paths by default. If - // we do have defined paths, they will be set after Py_Initialize(). - unsafe { - // Value is copied internally. So short lifetime is OK. - let value = OwnedPyStr::from_str("")?; - pyffi::Py_SetPath(value.as_wchar_ptr()); - } - - if let (Some(ref encoding), Some(ref errors)) = - (&config.standard_io_encoding, &config.standard_io_errors) - { - let cencoding = CString::new(encoding.clone()) - .or_else(|_| Err("unable to convert encoding to C string"))?; - let cerrors = CString::new(errors.clone()) - .or_else(|_| Err("unable to convert encoding error mode to C string"))?; - - let res = unsafe { - pyffi::Py_SetStandardStreamEncoding( - cencoding.as_ptr() as *const i8, - cerrors.as_ptr() as *const i8, - ) - }; - - if res != 0 { - return Err("unable to set standard stream encoding"); - } - } - - unsafe { - pyffi::Py_DontWriteBytecodeFlag = if config.dont_write_bytecode { 1 } else { 0 }; - pyffi::Py_IgnoreEnvironmentFlag = if config.ignore_python_env { 1 } else { 0 }; - pyffi::Py_NoSiteFlag = if config.import_site { 0 } else { 1 }; - pyffi::Py_NoUserSiteDirectory = if config.import_user_site { 0 } else { 1 }; - pyffi::Py_OptimizeFlag = config.opt_level; - pyffi::Py_UnbufferedStdioFlag = if config.unbuffered_stdio { 1 } else { 0 }; - } - - /* Pre-initialization functions we could support: - * - * PyObject_SetArenaAllocator() - * PySys_AddWarnOption() - * PySys_AddXOption() - * PySys_ResetWarnOptions() - */ - - unsafe { - pyffi::Py_Initialize(); - } - - // We shouldn't be accessing this pointer after Py_Initialize(). And the - // memory is stack allocated and doesn't outlive this frame. We don't want - // to leave a stack pointer sitting around! - unsafe { - super::importer::NEXT_MODULE_STATE = std::ptr::null(); - } - - let py = unsafe { Python::assume_gil_acquired() }; - self.py = Some(py); - self.init_run = true; - - // env::args() panics if arguments aren't valid Unicode. But invalid - // Unicode arguments are possible and some applications may want to - // support them. - // - // env::args_os() provides access to the raw OsString instances, which - // will be derived from wchar_t on Windows and char* on POSIX. We can - // convert these to Python str instances using a platform-specific - // mechanism. - let args_objs = env::args_os() - .map(|os_arg| osstring_to_str(py, os_arg)) - .collect::, &'static str>>()?; - - // This will steal the pointer to the elements and mem::forget them. - let args = PyList::new(py, &args_objs); - let argv = b"argv\0"; - - let res = args.with_borrowed_ptr(py, |args_ptr| unsafe { - pyffi::PySys_SetObject(argv.as_ptr() as *const i8, args_ptr) - }); - - match res { - 0 => (), - _ => return Err("unable to set sys.argv"), - } - - if config.argvb { - let args_objs: Vec = env::args_os() - .map(|os_arg| osstring_to_bytes(py, os_arg)) - .collect(); - - let args = PyList::new(py, &args_objs); - let argvb = b"argvb\0"; - - let res = args.with_borrowed_ptr(py, |args_ptr| unsafe { - pyffi::PySys_SetObject(argvb.as_ptr() as *const i8, args_ptr) - }); - - match res { - 0 => (), - _ => return Err("unable to set sys.argvb"), - } - } - - // As a convention, sys.oxidized is set to indicate we are running from - // a self-contained application. - let oxidized = b"oxidized\0"; - - let res = py.True().with_borrowed_ptr(py, |py_true| unsafe { - pyffi::PySys_SetObject(oxidized.as_ptr() as *const i8, py_true) - }); - - match res { - 0 => (), - _ => return Err("unable to set sys.oxidized"), - } - - Ok(py) - } - - /// Ensure the Python GIL is released. - pub fn release_gil(&mut self) { - if self.py.is_some() { - self.py = None; - self.gil = None; - } - } - - /// Ensure the Python GIL is acquired, returning a handle on the interpreter. - pub fn acquire_gil(&mut self) -> Python<'a> { - match self.py { - Some(py) => py, - None => { - let gil = GILGuard::acquire(); - let py = unsafe { Python::assume_gil_acquired() }; - - self.gil = Some(gil); - self.py = Some(py); - - py - } - } - } - - /// Runs the interpreter with the default code execution settings. - /// - /// The crate was built with settings that configure what should be - /// executed by default. Those settings will be loaded and executed. - pub fn run(&mut self) -> PyResult { - // clone() to avoid issues mixing mutable and immutable borrows of self. - let run = self.config.run.clone(); - - let py = self.acquire_gil(); - - match run { - PythonRunMode::None => Ok(py.None()), - PythonRunMode::Repl => self.run_repl(), - PythonRunMode::Module { module } => self.run_module_as_main(&module), - PythonRunMode::Eval { code } => self.run_code(&code), - } - } - - /// Handle a raised SystemExit exception. - /// - /// This emulates the behavior in pythonrun.c:handle_system_exit() and - /// _Py_HandleSystemExit() but without the call to exit(), which we don't want. - fn handle_system_exit(&mut self, py: Python, err: PyErr) -> Result { - std::io::stdout() - .flush() - .or_else(|_| Err("failed to flush stdout"))?; - - let mut value = match err.pvalue { - Some(ref instance) => { - if instance.as_ptr() == py.None().as_ptr() { - return Ok(0); - } - - instance.clone_ref(py) - } - None => { - return Ok(0); - } - }; - - if unsafe { pyffi::PyExceptionInstance_Check(value.as_ptr()) } != 0 { - // The error code should be in the "code" attribute. - if let Ok(code) = value.getattr(py, "code") { - if code == py.None() { - return Ok(0); - } - - // Else pretend exc_value.code is the new exception value to use - // and fall through to below. - value = code; - } - } - - if unsafe { pyffi::PyLong_Check(value.as_ptr()) } != 0 { - return Ok(unsafe { pyffi::PyLong_AsLong(value.as_ptr()) as i32 }); - } - - let sys_module = py - .import("sys") - .or_else(|_| Err("unable to obtain sys module"))?; - let stderr = sys_module.get(py, "stderr"); - - // This is a cargo cult from the canonical implementation. - unsafe { pyffi::PyErr_Clear() } - - match stderr { - Ok(o) => unsafe { - pyffi::PyFile_WriteObject(value.as_ptr(), o.as_ptr(), pyffi::Py_PRINT_RAW); - }, - Err(_) => { - unsafe { - pyffi::PyObject_Print(value.as_ptr(), stderr_to_file(), pyffi::Py_PRINT_RAW); - } - std::io::stderr() - .flush() - .or_else(|_| Err("failure to flush stderr"))?; - } - } - - unsafe { - pyffi::PySys_WriteStderr(b"\n\0".as_ptr() as *const i8); - } - - // This frees references to this exception, which may be necessary to avoid - // badness. - err.restore(py); - unsafe { - pyffi::PyErr_Clear(); - } - - Ok(1) - } - - /// Runs the interpreter and handles any exception that was raised. - pub fn run_and_handle_error(&mut self) -> PythonRunResult { - // There are underdefined lifetime bugs at play here. There is no - // explicit lifetime for the PyObject's returned. If we don't have - // the local variable in scope, we can get into a situation where - // drop() on self is called before the PyObject's drop(). This is - // problematic because PyObject's drop() attempts to acquire the GIL. - // If the interpreter is shut down, there is no GIL to acquire, and - // we may segfault. - // TODO look into setting lifetimes properly so the compiler can - // prevent some issues. - let res = self.run(); - let py = self.acquire_gil(); - - match res { - Ok(_) => PythonRunResult::Ok {}, - Err(err) => { - // SystemExit is special in that PyErr_PrintEx() will call - // exit() if it is seen. So, we handle it manually so we can - // return an exit code instead of exiting. - - // TODO surely the cpython crate offers a better way to do this... - err.restore(py); - let matches = - unsafe { pyffi::PyErr_ExceptionMatches(pyffi::PyExc_SystemExit) } != 0; - let err = cpython::PyErr::fetch(py); - - if matches { - return PythonRunResult::Exit { - code: match self.handle_system_exit(py, err) { - Ok(code) => code, - Err(msg) => { - eprintln!("{}", msg); - 1 - } - }, - }; - } - - self.print_err(err); - - PythonRunResult::Err {} - } - } - } - - /// Calls run() and resolves a suitable exit code. - pub fn run_as_main(&mut self) -> i32 { - match self.run_and_handle_error() { - PythonRunResult::Ok {} => 0, - PythonRunResult::Err {} => 1, - PythonRunResult::Exit { code } => code, - } - } - - /// Runs a Python module as the __main__ module. - /// - /// Returns the execution result of the module code. - /// - /// The interpreter is automatically initialized if needed. - pub fn run_module_as_main(&mut self, name: &str) -> PyResult { - let py = self.acquire_gil(); - - // This is modeled after runpy.py:_run_module_as_main(). - let main: PyModule = unsafe { - PyObject::from_borrowed_ptr( - py, - pyffi::PyImport_AddModule("__main__\0".as_ptr() as *const c_char), - ) - .cast_into(py)? - }; - - let main_dict = main.dict(py); - - let importlib_util = py.import("importlib.util")?; - let spec = importlib_util.call(py, "find_spec", (name,), None)?; - let loader = spec.getattr(py, "loader")?; - let code = loader.call_method(py, "get_code", (name,), None)?; - - let origin = spec.getattr(py, "origin")?; - let cached = spec.getattr(py, "cached")?; - - // TODO handle __package__. - main_dict.set_item(py, "__name__", "__main__")?; - main_dict.set_item(py, "__file__", origin)?; - main_dict.set_item(py, "__cached__", cached)?; - main_dict.set_item(py, "__doc__", py.None())?; - main_dict.set_item(py, "__loader__", loader)?; - main_dict.set_item(py, "__spec__", spec)?; - - unsafe { - let globals = main_dict.as_object().as_ptr(); - let res = pyffi::PyEval_EvalCode(code.as_ptr(), globals, globals); - - if res.is_null() { - let err = PyErr::fetch(py); - err.print(py); - Err(PyErr::fetch(py)) - } else { - Ok(PyObject::from_owned_ptr(py, res)) - } - } - } - - /// Start and run a Python REPL. - /// - /// This emulates what CPython's main.c does. - /// - /// The interpreter is automatically initialized if needed. - pub fn run_repl(&mut self) -> PyResult { - let py = self.acquire_gil(); - - unsafe { - pyffi::Py_InspectFlag = 0; - } - - // readline is optional. We don't care if it fails. - if py.import("readline").is_ok() {} - - let sys = py.import("sys")?; - - if let Ok(hook) = sys.get(py, "__interactivehook__") { - hook.call(py, NoArgs, None)?; - } - - let stdin_filename = ""; - let filename = CString::new(stdin_filename) - .or_else(|_| Err(PyErr::new::(py, "could not create CString")))?; - let mut cf = pyffi::PyCompilerFlags { cf_flags: 0 }; - - // TODO use return value. - unsafe { - let stdin = stdin_to_file(); - pyffi::PyRun_AnyFileExFlags(stdin, filename.as_ptr() as *const c_char, 0, &mut cf) - }; - - Ok(py.None()) - } - - /// Runs Python code provided by a string. - /// - /// This is similar to what ``python -c `` would do. - /// - /// The interpreter is automatically initialized if needed. - pub fn run_code(&mut self, code: &str) -> PyResult { - let py = self.acquire_gil(); - - let code = CString::new(code).or_else(|_| { - Err(PyErr::new::( - py, - "source code is not a valid C string", - )) - })?; - - unsafe { - let main = pyffi::PyImport_AddModule("__main__\0".as_ptr() as *const _); - - if main.is_null() { - return Err(PyErr::fetch(py)); - } - - let main_dict = pyffi::PyModule_GetDict(main); - - let res = pyffi::PyRun_StringFlags( - code.as_ptr() as *const _, - pyffi::Py_file_input, - main_dict, - main_dict, - std::ptr::null_mut(), - ); - - if res.is_null() { - Err(PyErr::fetch(py)) - } else { - Ok(PyObject::from_owned_ptr(py, res)) - } - } - } - - /// Print a Python error. - /// - /// Under the hood this calls ``PyErr_PrintEx()``, which may call - /// ``Py_Exit()`` and may write to stderr. - pub fn print_err(&mut self, err: PyErr) { - let py = self.acquire_gil(); - err.print(py); - } -} - -/// Write loaded Python modules to a directory. -/// -/// Given a Python interpreter and a path to a directory, this will create a -/// file in that directory named ``modules-`` and write a ``\n`` delimited -/// list of loaded names from ``sys.modules`` into that file. -fn write_modules_to_directory(py: Python, path: &PathBuf) -> Result<(), &'static str> { - // TODO this needs better error handling all over. - - fs::create_dir_all(path).or_else(|_| Err("could not create directory for modules"))?; - - let rand = uuid::Uuid::new_v4(); - - let path = path.join(format!("modules-{}", rand.to_string())); - - let sys = py - .import("sys") - .or_else(|_| Err("could not obtain sys module"))?; - let modules = sys - .get(py, "modules") - .or_else(|_| Err("could not obtain sys.modules"))?; - - let modules = modules - .cast_as::(py) - .or_else(|_| Err("sys.modules is not a dict"))?; - - let mut names = BTreeSet::new(); - for (key, _value) in modules.items(py) { - names.insert( - key.extract::(py) - .or_else(|_| Err("module name is not a str"))?, - ); - } - - let mut f = fs::File::create(path).or_else(|_| Err("could not open file for writing"))?; - - for name in names { - f.write_fmt(format_args!("{}\n", name)) - .or_else(|_| Err("could not write"))?; - } - - Ok(()) -} - -impl<'a> Drop for MainPythonInterpreter<'a> { - fn drop(&mut self) { - if let Some(key) = &self.config.write_modules_directory_env { - if let Ok(path) = env::var(key) { - let path = PathBuf::from(path); - let py = self.acquire_gil(); - - if let Err(msg) = write_modules_to_directory(py, &path) { - eprintln!("error writing modules file: {}", msg); - } - } - } - - let _ = unsafe { pyffi::Py_FinalizeEx() }; - } -} diff --git a/pyembed/src/pystr.rs b/pyembed/src/pystr.rs deleted file mode 100644 index 27e716b..0000000 --- a/pyembed/src/pystr.rs +++ /dev/null @@ -1,98 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Bridge Rust and Python string types. - -use libc::{c_void, size_t, wchar_t}; -use python3_sys as pyffi; -use std::ffi::{CString, OsString}; -use std::ptr::null_mut; - -#[cfg(target_family = "unix")] -use std::os::unix::ffi::OsStrExt; -#[cfg(target_family = "windows")] -use std::os::windows::prelude::OsStrExt; - -use cpython::{PyObject, Python}; - -#[derive(Debug)] -pub struct OwnedPyStr { - data: *const wchar_t, -} - -impl OwnedPyStr { - pub fn as_wchar_ptr(&self) -> *const wchar_t { - self.data - } - - pub fn from_str(s: &str) -> Result { - // We need to convert to a C string so there is a terminal NULL - // otherwise Py_DecodeLocale() can get confused. - let cs = CString::new(s).or_else(|_| Err("source string has NULL bytes"))?; - - let size: *mut size_t = null_mut(); - let ptr = unsafe { pyffi::Py_DecodeLocale(cs.as_ptr(), size) }; - - if ptr.is_null() { - Err("could not convert str to Python string") - } else { - Ok(OwnedPyStr { data: ptr }) - } - } -} - -impl Drop for OwnedPyStr { - fn drop(&mut self) { - unsafe { pyffi::PyMem_RawFree(self.data as *mut c_void) } - } -} - -#[cfg(target_family = "unix")] -const SURROGATEESCAPE: &[u8] = b"surrogateescape\0"; - -#[cfg(target_family = "unix")] -pub fn osstring_to_str(py: Python, s: OsString) -> Result { - // PyUnicode_DecodeLocaleAndSize says the input must have a trailing NULL. - // So use a CString for that. - let b = CString::new(s.as_bytes()).or_else(|_| Err("not a valid C string"))?; - unsafe { - let o = pyffi::PyUnicode_DecodeLocaleAndSize( - b.as_ptr() as *const i8, - b.to_bytes().len() as isize, - SURROGATEESCAPE.as_ptr() as *const i8, - ); - - Ok(PyObject::from_owned_ptr(py, o)) - } -} - -#[cfg(target_family = "windows")] -pub fn osstring_to_str(py: Python, s: OsString) -> Result { - // Windows OsString should be valid UTF-16. - let w: Vec = s.encode_wide().collect(); - unsafe { - Ok(PyObject::from_owned_ptr( - py, - pyffi::PyUnicode_FromWideChar(w.as_ptr(), w.len() as isize), - )) - } -} - -#[cfg(target_family = "unix")] -pub fn osstring_to_bytes(py: Python, s: OsString) -> PyObject { - let b = s.as_bytes(); - unsafe { - let o = pyffi::PyBytes_FromStringAndSize(b.as_ptr() as *const i8, b.len() as isize); - PyObject::from_owned_ptr(py, o) - } -} - -#[cfg(target_family = "windows")] -pub fn osstring_to_bytes(py: Python, s: OsString) -> PyObject { - let w: Vec = s.encode_wide().collect(); - unsafe { - let o = pyffi::PyBytes_FromStringAndSize(w.as_ptr() as *const i8, w.len() as isize * 2); - PyObject::from_owned_ptr(py, o) - } -} diff --git a/pyoxidizer.toml b/pyoxidizer.toml deleted file mode 100644 index c6a490d..0000000 --- a/pyoxidizer.toml +++ /dev/null @@ -1,119 +0,0 @@ -# This file controls the PyOxidizer build configuration. See the -# pyoxidizer crate's documentation for extensive documentation -# on this file format. - -[[build]] -application_name = "insee_translator" - -[[embedded_python_config]] -raw_allocator = "jemalloc" - -# dont_write_bytecode = true -# ignore_environment = true -# no_site = true -# no_user_site_directory = true -# optimize_level = 0 -# stdio_encoding = "utf-8:strict" -# unbuffered_stdio = false -#write_modules_directory_env = "PYOXIDIZER_WRITE_MODULES_DIR" - -# Windows doesn't support jemalloc. -[[embedded_python_config]] -build_target = "x86_64-pc-windows-msvc" -raw_allocator = "system" - -[[packaging_rule]] -type = "stdlib-extensions-policy" - -# Package all available extension modules from the Python distribution. -# The Python interpreter will be fully featured. -policy = "all" - -# Only package the minimal set of extension modules needed to initialize -# a Python interpreter. Many common packages in Python's standard -# library won't work with this setting. -# policy = "minimal" - -# Only package extension modules that don't require linking against -# non-Python libraries. e.g. will exclude support for OpenSSL, SQLite3, -# other features that require external libraries. -# policy = "no-libraries" - -# Explicit list of extension modules from the distribution to include. -# [[packaging_rule]] -# type = "stdlib-extensions-explicit-includes" -# includes = ["binascii", "errno", "itertools", "math", "select", "_socket"] - -# Explicit list of extension modules from the distribution to exclude. -# [[packaging_rule] -# type = "stdlib-extensions-explicit-excludes" -# excludes = ["_ssl"] - -# Package the entire Python standard library without sources. -[[packaging_rule]] -type = "stdlib" -include_source = false - -# Write out license files next to the produced binary. -[[packaging_rule]] -type = "write-license-files" -path = "" - -# Package .py files discovered in a local directory. -[[packaging_rule]] -type = "package-root" -path = "." -packages = ["data", "main"] - -# Package things from a populated virtualenv. -# [[packaging_rule]] -# type = "virtualenv" -# path = "/path/to/venv" - -# Filter all resources collected so far through a filter of names -# in a file. -# [[packaging_rule]] -# type = "filter-include" -# files = ["/path/to/filter-file"] - -# How Python should run by default. This is only needed if you -# call ``run()``. For applications customizing how the embedded -# Python interpreter is invoked, this section is not relevant. -[[embedded_python_run]] -# Run an interactive Python interpreter. -#mode = "repl" - -# Import a Python module and run it. -mode = "module" -module = "main.main" - -# Evaluate some Python code. -#mode = "eval" -#code = "import main; main.main()" - -# END OF COMMON USER-ADJUSTED SETTINGS. -# -# Everything below this is typically managed by PyOxidizer and doesn't need -# to be updated by people. - -[[python_distribution]] -build_target = "x86_64-apple-darwin" -url = "https://github.com/indygreg/python-build-standalone/releases/download/20190617/cpython-3.7.3-macos-20190618T0523.tar.zst" -sha256 = "6668202a3225892ce252eff4bb53a58ac058b6a413ab9d37c026a500c2a561ee" -[[python_distribution]] -build_target = "x86_64-pc-windows-msvc" -url = "https://github.com/indygreg/python-build-standalone/releases/download/20190617/cpython-3.7.3-windows-amd64-20190618T0516.tar.zst" -sha256 = "fd43554b5654a914846cf1c251d1ad366f46c7c4d20b7c44572251b533351221" -[[python_distribution]] -build_target = "x86_64-unknown-linux-gnu" -url = "https://github.com/indygreg/python-build-standalone/releases/download/20190617/cpython-3.7.3-linux64-20190618T0324.tar.zst" -sha256 = "d6b80a9723c124d6d193f8816fdb874ba6d56abfb35cbfcc2b27de53176d0620" -[[python_distribution]] -build_target = "x86_64-unknown-linux-musl" -url = "https://github.com/indygreg/python-build-standalone/releases/download/20190617/cpython-3.7.3-linux64-musl-20190618T0400.tar.zst" -sha256 = "2be2d109b82634b36685b89800887501b619ef946dda182e5a8ab5c7029a8136" - - -[[pyoxidizer]] -version = "0.2.0" -commit = "" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9428b6c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[tool.poetry] +name = "insee_number_translator" +version = "0.1.0" +description = "Translate french INSEE number to meaningful data" +authors = ["Gabriel Augendre "] +license = "MIT" + +[tool.poetry.dependencies] +python = "^3.9" + +[tool.poetry.dev-dependencies] +pre-commit = "^2.13.0" + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index c0c09b9..0000000 --- a/src/main.rs +++ /dev/null @@ -1,30 +0,0 @@ -use pyembed::{default_python_config, MainPythonInterpreter}; - -fn main() { - // The following code is in a block so the MainPythonInterpreter is destroyed in an - // orderly manner, before process exit. - let code = { - // Load the default Python configuration as derived by the PyOxidizer config - // file used at build time. - let config = default_python_config(); - - // Construct a new Python interpreter using that config, handling any errors - // from construction. - match MainPythonInterpreter::new(config) { - Ok(mut interp) => { - // And run it using the default run configuration as specified by the - // configuration. If an uncaught Python exception is raised, handle it. - // This includes the special SystemExit, which is a request to terminate the - // process. - interp.run_as_main() - } - Err(msg) => { - eprintln!("{}", msg); - 1 - } - } - }; - - // And exit the process according to code execution results. - std::process::exit(code); -}