From 304d585c45291e4381feb6a87a18720351c3d438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Mart=C3=ADnez?= <49537445+JasterV@users.noreply.github.com> Date: Wed, 10 Dec 2025 20:59:33 +0100 Subject: [PATCH] Merge pull request #6 from JasterV/feat/rcmap-rust-post post: Implementing a reference-count map in Rust --- codebook.toml | 1 + public/favicon.png | Bin 462 -> 1328 bytes .../blog/DecouplingElixirGenServers.md | 2 - src/content/blog/RcMapRust.md | 336 ++++++++++++++++++ src/content/blog/assets/wizard.png | Bin 0 -> 2762 bytes src/layouts/BaseLayout.astro | 1 + 6 files changed, 338 insertions(+), 2 deletions(-) create mode 100644 src/content/blog/RcMapRust.md create mode 100644 src/content/blog/assets/wizard.png diff --git a/codebook.toml b/codebook.toml index ca9ef15..6ef2564 100644 --- a/codebook.toml +++ b/codebook.toml @@ -3,4 +3,5 @@ words = [ "craftzdog", "martínez", "neovim", + "rustacean", ] diff --git a/public/favicon.png b/public/favicon.png index 11a2e5eae56b9045af89a8eb8e05f88b063432f8..04c7d2abbe781c03371f3047f3a8aba8a534949e 100644 GIT binary patch literal 1328 zcmV-01<(44P)k7RCt{2m}^WFR}{xLCN@n!_@zzS1~h72BlLqso-?~V))uA3 ziW-b9+E5<710wW+Slfz7V-rIJ$_#~)1$W@+JyV zG*edopYh5iY<}fK03iW9tOyVwWyL9*s^T+sP5j2-rbiGIP2 zC=G8JS{K|DLC0>?9t@BHR%s8EsUY5L@J77s8H}k`oC%ofvjhdFllIW?3cR(
X$%YQMf$RRQ1_Tq29ei7`RXp>+7u8V$p?aJvt78^Baj;0Z*hDqWXj@vqK8 zcD&dGH#!p>0GCgsz_wVEM_I^ZTc1-D0O`mEcS64Z{1pcPF3b0(jH(p{;26^2PDt0m z4A{5D2vyt4!Tj;tWB%K@rU5KfIdJnrJpA3qz|S3PV1L@DqyOVuD757M*!UC2&BhwrYhGV?yqn4@X@Pv)7S%KbTgA(ei5G$`Q?+Ycc1Fq{SI-r zdt*I1g(hooh{fH00YFja6xU$Kb``_%!acBi!x6YpmubIFc+v+t_`HYu0QX=c4{r@` zeahzm9D|MC9G~to;o6By)2hcQBXgP`0LTy10r18VgHQTYJc@03!rk59Z_bcGpMJ4+ zcmDg`b>`*117Jv@(g27dIX(oyrxgca@$qR3?-j1B7kTNovn72)M(!IZeAxn7I{%5z z))Od7vOsNiH_(_gHq|g?04s0%#VXjq*}Ka70q?#94ewk8BkF?E&F7$aodrHfJOc&s z9gr8-ZuhNvvqKpDEkCF4(07a1e+^}u&w`Qbg2sFi%zG|Dceyks)~HYYbEC*h*WFC0 zsEdMhZR#1Qv?YnWZ9p~gylp@fq_vvqSXaf>c~+r6)hb90O2CNr7*Yd`Nn>k#X;1`n z#LB)UBHzEkDoEQ!!B!#iQnQtpI<36zGEKLA)&5(#droC=&*}L!`Ys(K@=`OM!^%tf mbWS=qjp0+#vo>r0bNd%cI>O@tIS}ap0000Px$h)G02R5(xVRIyS5F%W%eWUfD&z?_A>XTdqzGg3mM7Km8;3Eq|X3w(hVhZUvc z6TYfnO7VaA?ZFARrds zPQ6sn=b}*wBwAEIfr(2of*sCAz~XBIgi&%-2{2jCK|38juAk;<_W9u~RSXR_jQLy? z^ayM(kOT-wGm=H8t~#VUB)sM&zZtyixEuggE@A@cwlO-)B-8V0Gw(G+8ouD>=oR=`wbYi`=OS+9 z0AVJ{T}+#DCn8T_hJN^FwX+mp@Hm!K2R_^}#eCjc3W*86g9 znOl2=OlO(Nx}i#D+Q)jGren=y2ZQ`kS${mV(HxJ;FR>$_8FbBRl>h($07*qoM6N<$ Ef)IGfzyJUM diff --git a/src/content/blog/DecouplingElixirGenServers.md b/src/content/blog/DecouplingElixirGenServers.md index b7289bf..94e3768 100644 --- a/src/content/blog/DecouplingElixirGenServers.md +++ b/src/content/blog/DecouplingElixirGenServers.md @@ -488,11 +488,9 @@ We've also seen how tests become simpler due to the fact that each test suite be There are some drawbacks though: - Debugging becomes harder. - - You are introducing an extra layer of asynchronous broadcast messaging and therefore your elixir processes become harder to debug. - Event delivery is not ensured. - - If a subscriber goes down, Phoenix PubSub won't try to re-deliver lost events once the process restarts. - Depending on your situation you might want to implement a more complex pub-sub system that does its best to ensure event delivery. diff --git a/src/content/blog/RcMapRust.md b/src/content/blog/RcMapRust.md new file mode 100644 index 0000000..adbe2f8 --- /dev/null +++ b/src/content/blog/RcMapRust.md @@ -0,0 +1,336 @@ +--- +title: "Implementing a reference-count map in Rust" +description: "A reference-count map is a data structure that keeps track of how many references to an entry exist and drops it when the last reference drops." +pubDate: 2025-12-09 +image: "./assets/wizard.png" +--- + +I came up with the term `reference-count map` for a map data structure that uses a reference counting system to know when to automatically clean up an entry. + +In this post I'd like to share how I implemented such a data structure in Rust. + +It is going to be very simple and could for sure be improved and contain more features but I wanted to keep it simple for my own needs. + +With all of this said, let's start! + +## First of all, why? + +I wanted to experiment with the idea of implementing an `Event bus` in Rust. + +Lately I've been working a lot with Elixir and the popular library Phoenix PubSub. + +This library offers a very simple API which allows developers to simply call `PubSub.subscribe(topic_name)` to subscribe to a topic and `PubSub.publish(topic_name, data)` to publish to it. + +I wanted to implement a type that provided the same API in Rust, with the same friendly user experience that `Phoenix PubSub` provides to Elixir developers. + +I wanted this `Event Bus` to internally manage the allocation and deallocation of `topics` in a way that users will never have to worry about it. + +So I needed a way to create `topics` on the fly anytime that a process subscribed to them and to clean them up from memory as soon as all the subscribers are dropped. + +Here I identified a pattern that could be encapsulated as a data structure, one that would be able to keep track of how many references to an entry exist and that +somehow would be able to also react to references getting dropped from memory. + +This is how the idea of creating what I named a `reference-count map` came to life! + +## Designing our data structure + +Let's go step by step, I will try to drive you through the same thought process I had. + +First of all, we need to store key-value pairs on a map, so let's start defining our `RcMap`: + +```rust +pub struct RcMap { + inner: HashMap, +} +``` + +Now we need a way to keep track of how many references exist. Let's keep it simple, we will update the map to include a counter along with the stored value: + +```rust +pub struct RcMap { + inner: HashMap, +} +``` + +Let's implement the basic functionality we can think of right now: + +```rust +impl RcMap +where + K: Hash + Eq, + V: Clone, +{ + pub fn new() -> Self { + Self { + inner: HashMap::new(), + } + } + + pub fn get(&mut self, key: K) -> Option { + let maybe = self.inner.get(&key).cloned(); + + match maybe { + Some((count, value)) => { + self.inner.insert(key, (count + 1, value.clone())); + Some(value) + } + None => None, + } + } + + pub fn insert(&mut self, key: K, value: V) -> V { + let _ = self.inner.insert(key, (1, value.clone())); + value + } +} +``` + +What are we doing here: + +- When inserting a new key-value pair we insert it with a reference count of 1 and return it. + +- Whenever we fetch an entry, we increment the reference count by 1. + +But there is a problem: + +- When we get a value we increment the reference counter, but how will the counter be decremented? How can we track that the value that was fetched gets dropped? + +### Implementing the `ObjectRef` + +We need a way to encapsulate the values we return to the caller in a way that when they get dropped, two things happen: + +- The reference count gets decremented by 1. +- If the count hits 0, we clean up the entry from the map. + +To implement this behavior I created a type named `ObjectRef`, which sounds generic enough to me for its purpose. + +The mentioned behavior of an `ObjectRef` naturally implies that it will need write access to the internal hashmap. + +To be able to do that, we need 2 things: + +- A synchronization mechanism so that the map can be safely updated from multiple places at a time. + +- A way to share the ownership of the map, otherwise it won't be possible for the map to be owned by `RcMap` and `ObjectRef` at the same time. + +For an experienced Rustacean this pattern will probably sound familiar... and exactly! What we need here is something like: + +```rust +Arc>> +``` + +As you might know, an `Arc` allows us to share the ownership of a value in a read-only way (no mutable references are allowed). + +A `RwLock` is a concurrency primitive that allows us to hold a write lock to a value so we are sure that the map can only be updated from 1 place at a time. + +Most importantly, `RwLock` implements an internal mutability pattern, which means that we can mutate the internal value (while holding the write lock) without taking a mutable reference to it. + +This is important because thanks to the combination of `Arc` and `RwLock`, the 2 conditions we've mentioned are met! + +#### Using DashMap instead of `RwLock>` + +The need to have a thread-safe map that allows users to safely read/write entries from multiple places at a time is a very common pattern in Rust. + +Some time ago I learned about [DashMap](https://docs.rs/dashmap/latest/dashmap/struct.DashMap.html), a concurrent hashmap which primary goal is to be a direct replacement for `RwLock>`! + +When working with a `DashMap` you don't have to worry about asking for a `write` lock and handle cases such as the internal lock being `poisoned`, +all of the synchronization is handled internally and the API that is offered to you is very simple and friendly while still getting all the benefits of a fully +concurrent and read-write safe hashmap. + +**Beware of deadlocks** + +`DashMap`s are awesome but you still need to know how to use them properly! + +If you try to perform a write operation while holding a read reference, your `DashMap` will silently deadlock instead of panicking or returning an error. + +```rust +{ + let topic = map.get(topic_name).unwrap(); + // This will cause a deadlock! + let _ = map.remove(topic_name); +} +``` + +#### Redefining RcMap + +Now the definition of our `RcMap` should look like this: + +```rust +pub struct RcMap { + inner: Arc>, +} +``` + +#### Defining `ObjectRef` + +Now that we know the purpose of an `ObjectRef`, let's get into the internals: + +```rust +pub struct ObjectRef +where + K: Hash + Eq, +{ + parent_ref: Weak>, + key: K, + value: V, +} +``` + +If you are not familiar with it, `Weak` is a version of Arc that holds a non-owning reference to the data. +A `Weak` pointer can be created from an `Arc` "downgrading" it (see [documentation](https://doc.rust-lang.org/std/sync/struct.Arc.html#method.downgrade)) and it will not increase the reference count. +This way, the `RcMap` is kept as the real owner of the `Arc` instead of the `ObjectRef`s. + +#### Implementing the drop behavior + +Now we can get into the real deal, how to automatically clean up map entries! + +```rust +impl Drop for ObjectRef +where + K: Hash + Eq, +{ + fn drop(&mut self) { + let Some(map) = self.parent_ref.upgrade() else { + return; + }; + + map.alter(&self.key, |_, (count, value)| (count - 1, value)); + map.remove_if(&self.key, |_, (count, _)| *count <= 0); + } +} +``` + +The implementation is fairly simple as the `DashMap` API offers a pleasantly clean way of altering and removing values. + +First we need to [upgrade](https://doc.rust-lang.org/std/sync/struct.Weak.html#method.upgrade) the weak reference to an `Arc`. + +Now we see the sense of storing also the `key` in the object ref, as we can use it inside the `drop` implementation to update the inner map. + +If the value was already dropped, it will return none and it won't do anything. + +If the original map is not yet dropped, it can be updated. + +So, first we decrease the reference count by 1. + +Then, if the `count` is equal or less than 0 we simply remove the entry, easy! + +#### Reimplementing RcMap + +Now we have an object which we can use to return whenever someone wants to get a value, and we know that this object will take care of deallocating map entries "on-drop". + +Let's see how to reimplement our `RcMap` to take advantage of `DashMap` and `ObjectRef`. + +```rust +impl RcMap +where + K: Hash + Eq + Clone, + V: Clone, +{ + pub fn new() -> Self { + Self { + inner: Arc::new(DashMap::new()), + } + } + + pub fn get(&self, key: K) -> Option> { + self.inner + .alter(&key, |_, (count, value)| (count + 1, value)); + + let Some(value_ref) = self.inner.get(&key) else { + return None; + }; + + let (_count, value) = value_ref.value(); + + Some(ObjectRef { + key, + parent_ref: Arc::downgrade(&self.inner), + value: value.clone(), + }) + } + + + pub fn insert(&self, key: K, value: V) -> Result, InsertError> { + if let Some(object_ref) = self.get(key.clone()) { + return Err(InsertError::AlreadyExists(key, object_ref)); + } + + let _prev = self.inner.insert(key.clone(), (1, value.clone())); + + Ok(ObjectRef { + key, + parent_ref: Arc::downgrade(&self.inner), + value, + }) + } +} +``` + +The implementation looks quite self-explanatory to me, but there are a few things to point here. + +First, both the key and value need to be "clone-able", and that makes sense because we need to clone these values from the inner map into the `ObjectRef`. + +We could perhaps use `Arc` to wrap both the key and the value to not enforce them to implement Clone, but I was not sure about it so this has simply been an implementation detail I've left this way. + +Second, we see that the `insert` function returns an `InsertError::AlreadyExists` error if the program tries to insert an entry with a key that already exists. + +This is the definition of the error type: + +```rust +pub enum InsertError +where + K: Hash + Eq, +{ + AlreadyExists(K, ObjectRef), +} +``` + +This check is done for consistency reasons, because an entry must only be removed by the last `ObjectRef` being dropped. + +Otherwise there could be unrelated old `ObjectRef` instances modifying the reference count of a new inserted entry. + +To prevent this from happening, we enforce that to be able to insert an entry with an already existing key, +one must wait until all `ObjectRef`s pointing to the current entry are dropped. + +Then, you can see how we return an `ObjectRef` containing the already existing entry along with the error. + +This is not necessary but it can come in handy when working with `RcMap`. + +## Usage example + +```rust +let map = RcMap::new(); + +{ + let inserted_ref = map + .insert("potato", "chair") + .expect("No entry should exist"); + + let obj_ref = map + .get("potato") + .expect("This entry exists"); + + // All refs are dropped, the entry is removed +} + +let obj_ref = map.get("potato"); + +assert!(obj_ref.is_none()); +``` + +The example above should give a clear idea of how to use our `RcMap` :) + +#### Conclusions + +It has been a very interesting journey to learn how to implement such a data structure. + +We've learned about a thread-safe hash map called `DashMap` which combined with an `Arc` gives thread-safe superpowers to our `RcMap`. + +We've also learned about `Weak` and how to use it to safely keep non-owning references to data that could or could not exist. + +I know that it could be more feature complete with operations such as `remove` or `alter`, the last one probably being more complex as we might decide to also alter all `ObjectRef`s pointing to an entry. + +I kept the implementation as simple as I could to serve the needs of the [event_bus.rs](https://github.com/JasterV/event_bus.rs) crate. + +On a later post I want to talk about this crate and how to use an `RcMap` to implement an `EventBus` in a very simple way. + +If you got here I can't thank you enough, I hope you enjoyed and stay tuned! diff --git a/src/content/blog/assets/wizard.png b/src/content/blog/assets/wizard.png new file mode 100644 index 0000000000000000000000000000000000000000..c8d78b450188d849979a85aa956611cfcb19e9a3 GIT binary patch literal 2762 zcmV;*3N`hKP)RCt{2oO@Ih*B!^N?P+t`COPd%dYUwv#3mx4jctNFW>~}o zc}PX8QKMGl3uh1&H4>j7Di|NB24>M}BC?30q6iAy3rdj~K@IGJMw!8+O-_0elW0wj zMk6BrdhUoK>n!ZTF0+sMob&sKW#`WC`2YNxj8*Cx)LvrC8uo>cY<#L4Drf>z-RsmzPE(k~S4F1u! zC;vdF;@=Jbr#Hhy--0{eWyndj^K~ltpwci~#itSKwcTn1X!!QdfWHrHH6gInU+uh| zf>@RWb1b>um5!Btc8g60(C~YD6HE;#qD|4;$kT}lF6+xP=f9oye+g2MV`?KQuCn73g`M+Ss@XSW~(y^tru1>wo|m|R78XUk#^_1+PtRF1Ci z)=QQQp!0-el>8IqX>crlj$`mudl)eO<$PJp$QQDsl*$VCHY8gf5$OSW9?*++e)bTb z2FDV4_8ab%ou*V;h)>4Dw0}?@4Q1Ryo(9jB%&?dEcVpCmJuvtW?|){g%;3K7eXC@E zuJ6{%$dlkg4n58@uI zeDQe*;+Zz>w5q;24}`jY#pY6vDMS9c+7Zo&619aC3}5?u|SRE|@%N?W{Lv zNeZ+`esJM~NXOIyhO9)M2FH@6)(Z4l*$CMH8V;!iEV#=^0!v0zxZ+D;~vypBBTd`|~r!5z_TQ;GeI&F)rYMlSjNF~6I{aH} zJKvp4S1F4Bea&EaD{`L$`U1QKxjQ_A6RZ_v`&(-j4Y+C= zA6qL(S^7MSO;FOvopv{%HKrhUg$pcs(H1ez=8uD0qCc2&C&8+*2DwJiwx1fXCH^0s zet&LgA!IIC3&vej;LD1kaQ(Cnn$9wCsd@<1@0|!c7N^4O!9||a8S@NYZHoaX4bx%G z#P{3%ZPuV-$W2-SyAoGg{9aseG3;Hj9In)cSgb+ki(Z5L)Mb$JdX_>{RO*e~5ldr# z-j0zFu^$=_jkYMfcyNrO=w`7Nxf}GHeNTx0^|1&=;b~J=uzE4X23ga25%&Xy?B=V*O3P7U9sGv__aHy*nB;X!G*|OaCGxC zH?+`Vhud^E5S9$v-G%Goo3s3mJcir>p2aDSG$3<<@`R1XiL97zT)SijatEyWJ?~5% zhJ$Mtbx9~X&bMU1d-LAuvH=`R{)JqfIHvd+Ym(je<~n()IeB!B%|d-uIn<&7%SPmO z$AA_-u;?k|%5V&cx8~{pvf_@2bn<1ZP;`cC$$Ow&v+?xo^Y8 z)>?3_KFDDP&=Y&eRpERO{gPwI4fCY364uDOS#($HVpW(dy7|cEs}CaK%)SXwv3U-p z&B#!t32JMK@N>u&;aMD|l+fZ~d*Ji3;hpYywMc&ONy#XfKcw|n>s{%D=dbU+0SsP@ z+->$tn+9X<DB2jtKFJ!Zj9+w%bw!t!Z}X2Oc=i?$pqoC2JBB3&$#o z_Jb+d%9Ts&7USsA#%`IwkTT>-a4gBTCb28EA#yWOv1zt^%Hcrjl6z-?mJZ)-x8-OW zbf#^KsqJb2LvGObRv=e_W69^8CADy99#m(>z_Q`(hl+I6gEuaKcRLw=S~8)dnm|8G z9lzbZe@`$oxU^4q+g!dIVa=8#?d0hOjTO6N_t%1L$Q7V3O;ytXzJpEq8y_=pu{sRu z_e_$HHJWeAcW3}hF1W;-Q|ObTw)nh*f@4btB4>wVNrT!#@s0{-f2Zi=kb+;^_eHA2 zse)tiDdfy>EDl#&B;Jt*Ft~Y+KYkQBD;z_T)E0?%cwogBKRE>)i;L73h&Z|ytV7NU zJ=Em(2Cz=r56JKLbOM2R$Jc=7i37@hg`AC829Hx)9KMHQMj#GB&IZSlchnY!@3~lr zoB`t|X@E&DctmT5x*{@ z@h?Wb^n*z+fq9gtoPo^PlaM^(D8vPqsxBPg#h4vZ1}U#qf??JvIQsV42lAvrHK1Db zxB(_XELXZA^QcJ|p>#z(id@4Jw5NVMk=KH&3gA-1#+d zB;^bk1+mloWD^AGaF2=)0KH6tbkmxfvuI3OgZxw#C)LB=1+|bBcM3L7uZFZYj>Ecf z#~^i7rCj9~MU+EAc)2W4{NS>CF;mwjV4SYB-DmXwX)KB?hotBVNF7}X>&Jfx>2Dl| zEiu*d6P=e}1iZW+j;%Wfwdq~YqqP<^sbRpi?%cFHg;9{MqMpX6mv*Y1xXDSvs28() z+yGPhc^{)*x}tXSCe_84uG9*WIzQ|)35|a?3F1k$MKGxsC#VLhh~LhL^o9^x4NZdh zz574$;&(=a%NSEcHO9A(q1*z%o z-|7F{{~dY_dM$d*)@yH|WzjNe+4LSxWt@FNZ9OzJG&D3cG&D3cG?2&ee>%EPNpPxW QSO5S307*qoM6N<$g4Ur~Bme*a literal 0 HcmV?d00001 diff --git a/src/layouts/BaseLayout.astro b/src/layouts/BaseLayout.astro index 5ec8db7..5d9793b 100644 --- a/src/layouts/BaseLayout.astro +++ b/src/layouts/BaseLayout.astro @@ -44,6 +44,7 @@ const backgroundImageUrl = `url('${optimizedBgImage.src}')`; >Craftzdog ♥

+

Icons from icons8.com

© 2024 Victor Martínez. All Rights Reserved.