From 7048bb058a128ac617d3c1aacbb5ec5e0186cd81 Mon Sep 17 00:00:00 2001 From: JJJHolscher Date: Tue, 6 Aug 2024 22:45:46 +0200 Subject: [PATCH] readme.md --- README.md | 181 ++++++++++++++++++++++++++++++++++++++++++++++--- pyproject.toml | 2 +- src/store.py | 2 +- 3 files changed, 174 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index f5c7cd7..f2a0347 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,175 @@ # README -## files +A bunch of functionality in here, but the main one is `store.load_or_create`. +It might get its own package someday, when it's less before-pre-alpha-sneakpeak-demo as it's now. -______________________________________________________________________ -|.gitignore |a list of unshared files| -|makefile|dev tools for installing, publisizing etc.| -|pyproject.toml |project metadata| -|requirements.txt |python dependencies| -|setup.py|necessary for `pip install -e .`| -|src/main.py|first file that gets called| --------------------------------------------- +## load_or_create + +### the status quo + +A common pattern is: +```python +def load_obj(path): + "" + +def save_obj(obj, path): + "" + +def create_obj(*args, **kwargs): + "" + +def infer_path(obj_id): + return "./obj/" + str(obj_id) + +obj_id = 0 +path = infer_path(obj_id) +if path.exists(): + obj = load_obj(load) +else: + obj = create_obj(obj_id, some_kwarg=0) + save_obj(obj, path) +``` + +And in some cases, you want to create and save many variations of the object. +It might be better to hash it's characteristics and use that as part of the path. + +```python +import sha256 +import json + +def infer_path(obj_id, **some_other_kwargs): + hash = str(sha256(json.dumps(some_other_kwargs)).hexdigest()) + return "./obj/" + hash + ".pkl" +``` + +### the problem + +The above is fine and dandy, but when someone wants to use your obj, +they'd need to keep track of 4 separate functions. + +You can dress it up as such: +```python +def get_obj(obj_id, *args, **kwargs): + path = infer_path(obj_id) + if path.exists(): + obj = load_obj(load) + else: + obj = create_obj(obj_id, some_kwarg=0) + save_obj(obj, path) + return obj +``` +But that takes a lot of freedom away from your user, who might have their +own ideas on where the object should be stored. + +### the solution + +```python +from jo3util.store import load_or_create +get_obj = load_or_create( + load=load_obj, + save=save_obj, + path_fn=infer_path, +)(create_obj) + +obj = get_obj(obj_id, some_kwarg=0) + +path_of_obj_0 = get_obj.path(obj_id, some_kwarg=0) +path_of_obj_1 = get_obj.path_of_obj(obj) +assert path_of_obj_0 == path_of_obj_1 +``` + +You can now elegantly pack the four functions together. +But you still have the flexibility to alter the path function on the fly: + +```python +get_obj.path_fn = lambda hash: f"./{hash}.pkl" +``` + +Now, storing different objects of which one is dependent on the other, becomes intuitive and elegant: + +```python +get_human = load_or_create( + path_fn=lambda name: "./" + name + "/body.txt" +)(lambda name: name) +get_finger_print = load_or_create( + path_fn=lambda finger: get_human.dir_from_obj(human) / f"{finger}.print" +)(lambda human, finger: f"{human}'s finger the {finger}") + +assert not get_human.path("john").exists() +human = get_human("john") +assert get_human.path("john").exists() + +finger_print = get_finger_print(human, "thumb") +assert get_finger_print.path(human, "thumb") == "./john/thumb.print" +``` + +The Finger print is now always stored in the same directory as where the human's `body.txt` is stored. +You don't need to keep track of the location of `body.txt`. + + +### under the hood + +The main trick is to match the parameter names of the `create` function (in our case `create_obj`) +with those of the three other subfunctions (in our case `load_obj`, `save_obj` and `infer_path`). + +The three subfunctions's allowed parameters are mostly a non-strict superset of the create function's +parameters. + +When you call `get_obj`, something like this happens: + +```python +def call_fn_with_filtered_arguments(fn, *args, **kwargs): + """ call fn with only the subset of args and kwargs that fn expects. + """ + path_parameters = get_parameters_that_fn_expects(infer_path) + # in reality we first infer the args name, for positional arguments. + args = [arg for arg in args if arg in path_parameters] + kwargs = {key: arg for key, arg in kwargs.items() if key in path_parameters} + return infer_path(*args, **kwargs) + +def get_obj_pseudo_code(*args, **kwargs): + hash = some_hash_fn(*args, **kwargs) + path = call_fn_with_filtered_arguments(infer_path, *args, hash=hash, **kwargs) + if path.exists(): + return call_fn_with_filtered_arguments( + load_obj, + *args, + path=path, + file=open(path, "rb"), + **kwargs + ) + + obj = create_obj(*args, **kwargs) + call_fn_with_filtered_arguments( + save_obj, + qbj, + *args, + path=path, + file=open(path, "wb"), + **kwargs + ) + return obj +``` + +So, the load, save and path functions you provide do not have to have the same signature as the create +function does, but you can call them _as if_ they are the create function. + +### philosophy + +The main idea is that some object's storage location should be inferrable from the arguments +during its creation call. + +In reality, we tend to separately keep track of some object's path, its arguments and itself. +This tends to go bad when we need to load, save or create the object in some other context. +It becomes easy to forget where some object ought to be stored. +Or it can happen that or different places where the same object is handled, have different opinions on storage location. + +It can lead to duplicates; forgetting where the object was stored; or losing a folder of data +because the folder is too unwieldy to salvage. + +By packaging a function with it's load and save countparts and a default storage location, we don't +need to worry about storage location anymore and can focus on creating and using our objects. + +If we ever do change our minds on the ideal storage location, then there is an obvious central place +where we can change it, and that change then easily immediately applies to _all_ the places where +some object's path needs to be determined. diff --git a/pyproject.toml b/pyproject.toml index 3cf94dc..540ba49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "jo3util" -version = "0.0.17" +version = "0.0.18" description = "" dependencies = [] dynamic = ["readme"] diff --git a/src/store.py b/src/store.py index 5b8e4ff..3650ee5 100644 --- a/src/store.py +++ b/src/store.py @@ -96,7 +96,7 @@ class inner(load_or_create): self.save_wrapper(obj, *args, **{"path": path} | kwargs) if "file" in self.save_arg_names: self.hash_obj({"path": path} | kwargs) - if self.save_json: path.with_suffix(".json").write_bytes(self.to_json(**kwargs)) + if self.save_json: path.with_suffix(".kwargs.json").write_bytes(self.to_json(**kwargs)) return obj