Compare commits

...

17 commits

10 changed files with 178 additions and 55 deletions

1
.gitignore vendored
View file

@ -221,3 +221,4 @@ Temporary Items
/target
**/*.rs.bk
.direnv

2
.mise.toml Normal file
View file

@ -0,0 +1,2 @@
[tools]
python = {version="3.11", virtualenv=".venv"}

View file

@ -19,15 +19,9 @@ repos:
args:
- --markdown-linebreak-ext=md
- repo: https://github.com/golangci/golangci-lint
rev: v1.42.0
rev: v1.54.2
hooks:
- id: golangci-lint
- repo: https://github.com/TekWizely/pre-commit-golang
rev: v1.0.0-beta.4
hooks:
- id: go-fumpt
args:
- -w
- repo: https://github.com/PyCQA/isort
rev: 5.9.3
hooks:

39
LICENSE
View file

@ -1,24 +1,25 @@
DISCLAIMER: The files under "data/raw_data" are not covered by this license as they
were not created by the author of this software.
This is free and unencumbered software released into the public domain.
MIT License
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
Copyright (c) 2021 Gabriel Augendre
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
For more information, please refer to <https://unlicense.org>
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -95,3 +95,6 @@ inv release <version_name>
# Data sources
* https://www.insee.fr/fr/information/2560452, Millésime 2021 : Téléchargement des fichiers, CSV
# Reuse
If you do reuse my work, please consider linking back to this repository 🙂

View file

@ -1,3 +1,5 @@
// Package data is used to parse the insee number into a usable and human-readable data structure.
// All the required data is embedded in the package so no external file is required.
package data
import (
@ -10,9 +12,14 @@ import (
)
var (
Male = "homme"
Female = "femme"
// Male is one of the possible values for InseeData.Gender.
Male = "homme"
// Female is the other possible value for InseeData.Gender.
Female = "femme"
// Unknown is a value used when determining the value of a string field in InseeData was not possible.
Unknown = "inconnu(e)"
// France is the only item in the default slice for InseeData.Countries.
France = "FRANCE"
)
//go:embed curated_data/countries.json
@ -24,25 +31,46 @@ var rawDepartments []byte
//go:embed curated_data/cities.json
var rawCities []byte
// InseeData contains human-readable data about the insee number used to construct it.
type InseeData struct {
InseeNumber string `json:"insee_number"`
Gender string `json:"gender"`
Year int `json:"year"`
Month time.Month `json:"month"`
Department string `json:"department"`
City string `json:"city"`
CityCode string `json:"city_code"`
Foreign bool `json:"foreign"`
Countries []string `json:"countries"`
CountryCode string `json:"country_code"`
Continent string `json:"continent"`
OrderOfBirth int `json:"order_of_birth"`
ControlKey int `json:"control_key"`
// InseeNumber is the raw number, as given.
InseeNumber string `json:"insee_number"`
// Gender is either Male or Female.
Gender string `json:"gender"`
// Year of birth.
Year int `json:"year"`
// Month of birth.
Month time.Month `json:"month"`
// Department of birth, represented with its name.
Department string `json:"department"`
// City of birth, represented with its name.
City string `json:"city"`
// CityCode is the INSEE code of the City of birth.
CityCode string `json:"city_code"`
// Foreign is false if the person is born in France, true otherwise.
Foreign bool `json:"foreign"`
// Countries is the list of country names matching the CountryCode.
// Some country codes may match multiple countries, so Countries is a slice.
// This is always set to `{"FRANCE"}` when Foreign is false.
Countries []string `json:"countries"`
// CountryCode is the code of the birth country.
CountryCode string `json:"country_code"`
// Continent of birth.
Continent string `json:"continent"`
// OrderOfBirth is the order of birth of the person in the city or country (if Foreign) of birth at the year/month of birth.
// For example, 384 would mean that the person is the 384th born in the specific city/country on the given year/month.
OrderOfBirth int `json:"order_of_birth"`
// ControlKey is the complement to 97 of the insee number (minus the last two digits) modulo 97.
ControlKey int `json:"control_key"`
}
// NewInseeData generates an InseeData struct, extracting the data into the relevant fields.
// The data is converted to a human-readable format before being stored.
// If a value can't be determined, the corresponding field is generally set to Unknown.
// It returns an error when the given number isn't 15 characters long.
func NewInseeData(inseeNumber string) (*InseeData, error) {
if len(inseeNumber) != 15 {
return nil, fmt.Errorf("le numéro INSEE number must contain 15 characters")
return nil, fmt.Errorf("le numéro INSEE doit contenir 15 caractères")
}
num := inseeNumber
departmentCode := num[5:7]
@ -57,7 +85,7 @@ func NewInseeData(inseeNumber string) (*InseeData, error) {
}
var city string
var department string
countries_ := []string{"FRANCE"}
countries_ := []string{France}
countryCode := ""
continent := "Europe"
foreign := (dep >= 91 && dep <= 96) || dep == 99
@ -118,6 +146,9 @@ func NewInseeData(inseeNumber string) (*InseeData, error) {
}, nil
}
// IsValid returns true when the insee number is valid and false when not.
// The insee number is valid when it matches its ControlKey.
// It returns an error when the insee number can't be converted to an integer.
func (insee InseeData) IsValid() (bool, error) {
r := strings.NewReplacer(
"2A", "19",
@ -132,6 +163,7 @@ func (insee InseeData) IsValid() (bool, error) {
return code == insee.ControlKey, nil
}
// String returns a string representation of the InseeData in a human-readable format, suited for printing to stdout.
func (insee InseeData) String() string {
var result []string
result = append(result, insee.InseeNumber)

View file

@ -171,3 +171,13 @@ func TestNewInseeData_ValidFrenchCorsica(t *testing.T) {
assert.Equal([]string{"FRANCE"}, insee.Countries)
assert.Equal(23, insee.ControlKey)
}
var inseeResult *InseeData
func BenchmarkNewInseeData(b *testing.B) {
var in *InseeData
for i := 0; i < b.N; i++ {
in, _ = NewInseeData("299122A00498723")
}
inseeResult = in
}

View file

@ -12,7 +12,7 @@ func main() {
flag.Usage = func() {
out := flag.CommandLine.Output()
fmt.Fprintf(out, "Usage: %s [flags] [numero_insee...]\n", os.Args[0])
fmt.Fprintf(out, "\nCe programme décode les informations contenues dans votre numéro INSEE (numéro de sécurité sociale français)")
fmt.Fprintf(out, "\nCe programme décode les informations contenues dans votre numéro INSEE (numéro de sécurité sociale français) ")
fmt.Fprintf(out, "et vous les affiche d'une manière lisible et claire.\n")
flag.PrintDefaults()
fmt.Fprintf(out, "\nLes arguments numero_insee doivent comporter 15 caractères. Il est possible d'en spécifier plusieurs séparés par un espace.\n")

View file

@ -1 +1,2 @@
invoke
requests

101
tasks.py
View file

@ -1,11 +1,15 @@
import os
import re
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import List
import requests
from invoke import Context, task
TARGETS = [
"darwin/amd64",
"darwin/arm64",
"freebsd/386",
"freebsd/amd64",
"freebsd/arm",
@ -19,50 +23,119 @@ TARGETS = [
"windows/arm",
]
BASE_DIR = Path(__file__).parent.resolve(strict=True)
DIST_DIR = BASE_DIR / "dist"
GITEA_TOKEN = os.getenv("GITEA_TOKEN")
@task
def test(context):
def test(context: Context):
"""Run tests"""
context: Context
context.run(f"go test {BASE_DIR}/...", echo=True)
with context.cd(BASE_DIR):
context.run(f"go test ./... -race .", echo=True)
@task(pre=[test])
def release(context, version_name):
@task
def clean(context: Context):
"""Clean dist files"""
context.run(f"rm -rf {DIST_DIR}", echo=True)
@task(pre=[clean, test], post=[clean])
def release(context: Context, version_name):
"""Create & push git tag + build binaries"""
tag(context, version_name)
build(context, version_name)
binaries = build(context, version_name)
archives = compress(context, binaries)
upload(context, version_name, archives)
@task(pre=[test])
def tag(context, version_name):
def tag(context: Context, version_name):
"""Create & push a git tag"""
context: Context
version_name = fix_version_name(version_name)
context.run(f"git tag -a {version_name} -m '{version_name}'", echo=True)
context.run("git push --follow-tags", echo=True)
@task
def build(context, version_name):
def build(context: Context, version_name):
"""Cross-platform build"""
version_name = fix_version_name(version_name)
binaries = []
with ThreadPoolExecutor() as pool:
for target in TARGETS:
os, arch = target.split("/")
binary_name = f"insee-{version_name}-{os}-{arch}"
if os == "windows":
binary_name += ".exe"
binary_path = BASE_DIR / "dist" / binary_name
binary_path = DIST_DIR / binary_name
binaries.append(binary_path)
pool.submit(
context.run,
f"go build -o {binary_path}",
env={"GOOS": os, "GOARCH": arch},
echo=True,
)
return binaries
@task
def pre_process(context):
def compress(context: Context, binaries):
"""Compress binaries to .tar.gz"""
archives = []
with ThreadPoolExecutor() as pool:
for binary in binaries:
binary_name = binary.name
archive_path = DIST_DIR / f"{binary_name}.tar.gz"
archives.append(archive_path)
pool.submit(_compress_single_binary, context, archive_path, binary_name)
return archives
def _compress_single_binary(context, archive_path, binary_name):
with context.cd(DIST_DIR):
context.run(
f"tar czf {archive_path} {binary_name} && rm {binary_name}", echo=True
)
@task
def upload(ctx: Context, version_name, upload_files):
version_name = fix_version_name(version_name)
session = requests.Session()
if not GITEA_TOKEN:
raise ValueError("You need to set the GITEA_TOKEN env var before uploading")
session.headers["Authorization"] = f"token {GITEA_TOKEN}"
url = "https://git.augendre.info/api/v1/repos/gaugendre/insee_number_translator/releases"
resp = session.post(
url, json={"name": version_name, "tag_name": version_name, "draft": True}
)
resp.raise_for_status()
resp = resp.json()
html_url = resp.get("html_url")
print(f"The draft release has been created at {html_url}")
api_url = resp.get("url") + "/assets"
with ThreadPoolExecutor() as pool:
for upload_file in upload_files:
pool.submit(post_attachment, api_url, upload_file, session)
print(f"All uploads are finished. Update & publish your draft: {html_url}")
def post_attachment(api_url, upload_file, session):
upload_file = Path(upload_file)
name = upload_file.name
url = api_url + f"?name={name}"
print(f"Uploading {name}...")
with open(upload_file, "rb") as f:
res = session.post(url, files={"attachment": f})
status_code = res.status_code
if status_code != 201:
res = res.json()
print(f"Status != 201 for {name}: {status_code} {res}")
@task
def pre_process(context: Context):
"""Pre-process raw data into JSON"""
files_to_rename = {
r"commune.*\.csv": "commune.csv",
@ -78,3 +151,9 @@ def pre_process(context):
with context.cd(BASE_DIR):
context.run("go run ./pre_process")
def fix_version_name(version_name: str):
if not version_name.startswith("v"):
return f"v{version_name}"
return version_name