@ -1,133 +1,61 @@
use super ::{ DatabaseEngine , Tree } ;
use super ::{ DatabaseEngine , Tree } ;
use crate ::{ database ::Config , Result } ;
use crate ::{ database ::Config , Result } ;
use crossbeam ::channel ::{
bounded , unbounded , Receiver as ChannelReceiver , Sender as ChannelSender , TryRecvError ,
} ;
use parking_lot ::{ Mutex , MutexGuard , RwLock } ;
use parking_lot ::{ Mutex , MutexGuard , RwLock } ;
use rusqlite ::{ Connection , DatabaseName ::Main , OptionalExtension , Params } ;
use rusqlite ::{ Connection , DatabaseName ::Main , OptionalExtension } ;
use std ::{
use std ::{
cell ::RefCell ,
collections ::HashMap ,
collections ::HashMap ,
future ::Future ,
future ::Future ,
ops ::Deref ,
path ::{ Path , PathBuf } ,
path ::{ Path , PathBuf } ,
pin ::Pin ,
pin ::Pin ,
sync ::Arc ,
sync ::Arc ,
time ::{ Duration , Instant } ,
time ::{ Duration , Instant } ,
} ;
} ;
use threadpool ::ThreadPool ;
use tokio ::sync ::oneshot ::Sender ;
use tokio ::sync ::oneshot ::Sender ;
use tracing ::{ debug , warn } ;
use tracing ::{ debug , warn } ;
struct Pool {
writer : Mutex < Connection > ,
readers : Vec < Mutex < Connection > > ,
spills : ConnectionRecycler ,
spill_tracker : Arc < ( ) > ,
path : PathBuf ,
}
pub const MILLI : Duration = Duration ::from_millis ( 1 ) ;
pub const MILLI : Duration = Duration ::from_millis ( 1 ) ;
enum HoldingConn < ' a > {
thread_local! {
FromGuard ( MutexGuard < ' a , Connection > ) ,
static READ_CONNECTION : RefCell < Option < & ' static Connection > > = RefCell ::new ( None ) ;
FromRecycled ( RecycledConn , Arc < ( ) > ) ,
}
}
impl < ' a > Deref for HoldingConn < ' a > {
struct PreparedStatementIterator < ' a > {
type Target = Connection ;
pub iterator : Box < dyn Iterator < Item = TupleOfBytes > + ' a > ,
pub statement_ref : NonAliasingBox < rusqlite ::Statement < ' a > > ,
fn deref ( & self ) -> & Self ::Target {
match self {
HoldingConn ::FromGuard ( guard ) = > guard . deref ( ) ,
HoldingConn ::FromRecycled ( conn , _ ) = > conn . deref ( ) ,
}
}
}
}
struct ConnectionRecycler ( ChannelSender < Connection > , ChannelReceiver < Connection > ) ;
impl Iterator for PreparedStatementIterator < ' _ > {
type Item = TupleOfBytes ;
impl ConnectionRecycler {
fn next ( & mut self ) -> Option < Self ::Item > {
fn new ( ) -> Self {
self . iterator . next ( )
let ( s , r ) = unbounded ( ) ;
Self ( s , r )
}
fn recycle ( & self , conn : Connection ) -> RecycledConn {
let sender = self . 0. clone ( ) ;
RecycledConn ( Some ( conn ) , sender )
}
fn try_take ( & self ) -> Option < Connection > {
match self . 1. try_recv ( ) {
Ok ( conn ) = > Some ( conn ) ,
Err ( TryRecvError ::Empty ) = > None ,
// as this is pretty impossible, a panic is warranted if it ever occurs
Err ( TryRecvError ::Disconnected ) = > panic! ( "Receiving channel was disconnected. A a sender is owned by the current struct, this should never happen(!!!)" )
}
}
}
}
}
struct RecycledConn (
struct NonAliasingBox < T > ( * mut T ) ;
Option < Connection > , // To allow moving out of the struct when `Drop` is called.
impl < T > Drop for NonAliasingBox < T > {
ChannelSender < Connection > ,
) ;
impl Deref for RecycledConn {
type Target = Connection ;
fn deref ( & self ) -> & Self ::Target {
self . 0
. as_ref ( )
. expect ( "RecycledConn does not have a connection in Option<>" )
}
}
impl Drop for RecycledConn {
fn drop ( & mut self ) {
fn drop ( & mut self ) {
if let Some ( conn ) = self . 0. take ( ) {
unsafe { Box ::from_raw ( self . 0 ) } ;
debug ! ( "Recycled connection" ) ;
if let Err ( e ) = self . 1. send ( conn ) {
warn ! ( "Recycling a connection led to the following error: {:?}" , e )
}
}
}
}
}
}
impl Pool {
pub struct Engine {
fn new < P : AsRef < Path > > ( path : P , num_readers : usize , total_cache_size_mb : f64 ) -> Result < Self > {
writer : Mutex < Connection > ,
// calculates cache-size per permanent connection
// 1. convert MB to KiB
// 2. divide by permanent connections
// 3. round down to nearest integer
let cache_size : u32 = ( ( total_cache_size_mb * 1024.0 ) / ( num_readers + 1 ) as f64 ) as u32 ;
let writer = Mutex ::new ( Self ::prepare_conn ( & path , Some ( cache_size ) ) ? ) ;
let mut readers = Vec ::new ( ) ;
for _ in 0 .. num_readers {
readers . push ( Mutex ::new ( Self ::prepare_conn ( & path , Some ( cache_size ) ) ? ) )
}
Ok ( Self {
path : PathBuf ,
writer ,
cache_size_per_thread : u32 ,
readers ,
}
spills : ConnectionRecycler ::new ( ) ,
spill_tracker : Arc ::new ( ( ) ) ,
path : path . as_ref ( ) . to_path_buf ( ) ,
} )
}
fn prepare_conn < P : AsRef < Path > > ( path : P , cache_size : Option < u32 > ) -> Result < Connection > {
impl Engine {
let conn = Connection ::open ( path ) ? ;
fn prepare_conn ( path : & Path , cache_size_kb : u32 ) -> Result < Connection > {
let conn = Connection ::open ( & path ) ? ;
conn . pragma_update ( Some ( Main ) , "page_size" , & 32768 ) ? ;
conn . pragma_update ( Some ( Main ) , "journal_mode" , & "WAL" ) ? ;
conn . pragma_update ( Some ( Main ) , "journal_mode" , & "WAL" ) ? ;
conn . pragma_update ( Some ( Main ) , "synchronous" , & "NORMAL" ) ? ;
conn . pragma_update ( Some ( Main ) , "synchronous" , & "NORMAL" ) ? ;
conn . pragma_update ( Some ( Main ) , "cache_size" , & ( - i64 ::from ( cache_size_kb ) ) ) ? ;
if let Some ( cache_kib ) = cache_size {
conn . pragma_update ( Some ( Main ) , "wal_autocheckpoint" , & 0 ) ? ;
conn . pragma_update ( Some ( Main ) , "cache_size" , & ( - i64 ::from ( cache_kib ) ) ) ? ;
}
Ok ( conn )
Ok ( conn )
}
}
@ -136,68 +64,52 @@ impl Pool {
self . writer . lock ( )
self . writer . lock ( )
}
}
fn read_lock ( & self ) -> HoldingConn < ' _ > {
fn read_lock ( & self ) -> & ' static Connection {
// First try to get a connection from the permanent pool
READ_CONNECTION . with ( | cell | {
for r in & self . readers {
let connection = & mut cell . borrow_mut ( ) ;
if let Some ( reader ) = r . try_lock ( ) {
return HoldingConn ::FromGuard ( reader ) ;
}
}
debug ! ( "read_lock: All permanent readers locked, obtaining spillover reader..." ) ;
if ( * connection ) . is_none ( ) {
let c = Box ::leak ( Box ::new (
// We didn't get a connection from the permanent pool, so we'll dumpster-dive for recycled connections.
Self ::prepare_conn ( & self . path , self . cache_size_per_thread ) . unwrap ( ) ,
// Either we have a connection or we dont, if we don't, we make a new one.
) ) ;
let conn = match self . spills . try_take ( ) {
* * connection = Some ( c ) ;
Some ( conn ) = > conn ,
None = > {
debug ! ( "read_lock: No recycled connections left, creating new one..." ) ;
Self ::prepare_conn ( & self . path , None ) . unwrap ( )
}
}
} ;
// Clone the spill Arc to mark how many spilled connections actually exist.
let spill_arc = Arc ::clone ( & self . spill_tracker ) ;
// Get a sense of how many connections exist now.
connection . unwrap ( )
let now_count = Arc ::strong_count ( & spill_arc ) - 1 /* because one is held by the pool */ ;
} )
// If the spillover readers are more than the number of total readers, there might be a problem.
if now_count > self . readers . len ( ) {
warn ! (
"Database is under high load. Consider increasing sqlite_read_pool_size ({} spillover readers exist)" ,
now_count
) ;
}
}
// Return the recyclable connection.
pub fn flush_wal ( self : & Arc < Self > ) -> Result < ( ) > {
HoldingConn ::FromRecycled ( self . spills . recycle ( conn ) , spill_arc )
self . write_lock ( )
. pragma_update ( Some ( Main ) , "wal_checkpoint" , & "TRUNCATE" ) ? ;
Ok ( ( ) )
}
}
}
}
pub struct Engine {
pool : Pool ,
iter_pool : Mutex < ThreadPool > ,
}
impl DatabaseEngine for Engine {
impl DatabaseEngine for Engine {
fn open ( config : & Config ) -> Result < Arc < Self > > {
fn open ( config : & Config ) -> Result < Arc < Self > > {
let pool = Pool ::new (
let path = Path ::new ( & config . database_path ) . join ( "conduit.db" ) ;
Path ::new ( & config . database_path ) . join ( "conduit.db" ) ,
config . sqlite_read_pool_size ,
// calculates cache-size per permanent connection
config . db_cache_capacity_mb ,
// 1. convert MB to KiB
) ? ;
// 2. divide by permanent connections
// 3. round down to nearest integer
let cache_size_per_thread : u32 =
( ( config . db_cache_capacity_mb * 1024.0 ) / ( num_cpus ::get ( ) . max ( 1 ) + 1 ) as f64 ) as u32 ;
let writer = Mutex ::new ( Self ::prepare_conn ( & path , cache_size_per_thread ) ? ) ;
let arc = Arc ::new ( Engine {
let arc = Arc ::new ( Engine {
pool ,
writer ,
iter_pool : Mutex ::new ( ThreadPool ::new ( 10 ) ) ,
path ,
cache_size_per_thread ,
} ) ;
} ) ;
Ok ( arc )
Ok ( arc )
}
}
fn open_tree ( self : & Arc < Self > , name : & str ) -> Result < Arc < dyn Tree > > {
fn open_tree ( self : & Arc < Self > , name : & str ) -> Result < Arc < dyn Tree > > {
self . pool . write_lock ( ) . execute ( & format! ( "CREATE TABLE IF NOT EXISTS {} ( \"key\" BLOB PRIMARY KEY, \"value\" BLOB NOT NULL )" , name ) , [ ] ) ? ;
self . write_lock ( ) . execute ( & format! ( "CREATE TABLE IF NOT EXISTS {} ( \"key\" BLOB PRIMARY KEY, \"value\" BLOB NOT NULL )" , name ) , [ ] ) ? ;
Ok ( Arc ::new ( SqliteTable {
Ok ( Arc ::new ( SqliteTable {
engine : Arc ::clone ( self ) ,
engine : Arc ::clone ( self ) ,
@ -212,31 +124,6 @@ impl DatabaseEngine for Engine {
}
}
}
}
impl Engine {
pub fn flush_wal ( self : & Arc < Self > ) -> Result < ( ) > {
self . pool . write_lock ( ) . pragma_update ( Some ( Main ) , "wal_checkpoint" , & "RESTART" ) ? ;
Ok ( ( ) )
}
// Reaps (at most) (.len() * `fraction`) (rounded down, min 1) connections.
pub fn reap_spillover_by_fraction ( & self , fraction : f64 ) {
let mut reaped = 0 ;
let spill_amount = self . pool . spills . 1. len ( ) as f64 ;
let fraction = fraction . clamp ( 0.01 , 1.0 ) ;
let amount = ( spill_amount * fraction ) . max ( 1.0 ) as u32 ;
for _ in 0 .. amount {
if self . pool . spills . try_take ( ) . is_some ( ) {
reaped + = 1 ;
}
}
debug ! ( "Reaped {} connections" , reaped ) ;
}
}
pub struct SqliteTable {
pub struct SqliteTable {
engine : Arc < Engine > ,
engine : Arc < Engine > ,
name : String ,
name : String ,
@ -258,7 +145,7 @@ impl SqliteTable {
fn insert_with_guard ( & self , guard : & Connection , key : & [ u8 ] , value : & [ u8 ] ) -> Result < ( ) > {
fn insert_with_guard ( & self , guard : & Connection , key : & [ u8 ] , value : & [ u8 ] ) -> Result < ( ) > {
guard . execute (
guard . execute (
format! (
format! (
"INSERT INTO {} (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value " ,
"INSERT OR REPLACE INTO {} (key, value) VALUES (?, ?)" ,
self . name
self . name
)
)
. as_str ( ) ,
. as_str ( ) ,
@ -266,70 +153,17 @@ impl SqliteTable {
) ? ;
) ? ;
Ok ( ( ) )
Ok ( ( ) )
}
}
#[ tracing::instrument(skip(self, sql, param)) ]
fn iter_from_thread (
& self ,
sql : String ,
param : Option < Vec < u8 > > ,
) -> Box < dyn Iterator < Item = TupleOfBytes > + Send + Sync > {
let ( s , r ) = bounded ::< TupleOfBytes > ( 5 ) ;
let engine = Arc ::clone ( & self . engine ) ;
let lock = self . engine . iter_pool . lock ( ) ;
if lock . active_count ( ) < lock . max_count ( ) {
lock . execute ( move | | {
if let Some ( param ) = param {
iter_from_thread_work ( & engine . pool . read_lock ( ) , & s , & sql , [ param ] ) ;
} else {
iter_from_thread_work ( & engine . pool . read_lock ( ) , & s , & sql , [ ] ) ;
}
} ) ;
} else {
std ::thread ::spawn ( move | | {
if let Some ( param ) = param {
iter_from_thread_work ( & engine . pool . read_lock ( ) , & s , & sql , [ param ] ) ;
} else {
iter_from_thread_work ( & engine . pool . read_lock ( ) , & s , & sql , [ ] ) ;
}
} ) ;
}
Box ::new ( r . into_iter ( ) )
}
}
fn iter_from_thread_work < P > (
guard : & HoldingConn < ' _ > ,
s : & ChannelSender < ( Vec < u8 > , Vec < u8 > ) > ,
sql : & str ,
params : P ,
) where
P : Params ,
{
for bob in guard
. prepare ( sql )
. unwrap ( )
. query_map ( params , | row | Ok ( ( row . get_unwrap ( 0 ) , row . get_unwrap ( 1 ) ) ) )
. unwrap ( )
. map ( | r | r . unwrap ( ) )
{
if s . send ( bob ) . is_err ( ) {
return ;
}
}
}
}
impl Tree for SqliteTable {
impl Tree for SqliteTable {
#[ tracing::instrument(skip(self, key)) ]
#[ tracing::instrument(skip(self, key)) ]
fn get ( & self , key : & [ u8 ] ) -> Result < Option < Vec < u8 > > > {
fn get ( & self , key : & [ u8 ] ) -> Result < Option < Vec < u8 > > > {
self . get_with_guard ( & self . engine . pool . read_lock ( ) , key )
self . get_with_guard ( & self . engine . read_lock ( ) , key )
}
}
#[ tracing::instrument(skip(self, key, value)) ]
#[ tracing::instrument(skip(self, key, value)) ]
fn insert ( & self , key : & [ u8 ] , value : & [ u8 ] ) -> Result < ( ) > {
fn insert ( & self , key : & [ u8 ] , value : & [ u8 ] ) -> Result < ( ) > {
let guard = self . engine . pool . write_lock ( ) ;
let guard = self . engine . write_lock ( ) ;
let start = Instant ::now ( ) ;
let start = Instant ::now ( ) ;
@ -337,7 +171,7 @@ impl Tree for SqliteTable {
let elapsed = start . elapsed ( ) ;
let elapsed = start . elapsed ( ) ;
if elapsed > MILLI {
if elapsed > MILLI {
debug ! ( "insert: took {:012 ?} : {}" , elapsed , & self . name ) ;
warn ! ( "insert took {:?} : {}" , elapsed , & self . name ) ;
}
}
drop ( guard ) ;
drop ( guard ) ;
@ -369,7 +203,7 @@ impl Tree for SqliteTable {
#[ tracing::instrument(skip(self, key)) ]
#[ tracing::instrument(skip(self, key)) ]
fn remove ( & self , key : & [ u8 ] ) -> Result < ( ) > {
fn remove ( & self , key : & [ u8 ] ) -> Result < ( ) > {
let guard = self . engine . pool . write_lock ( ) ;
let guard = self . engine . write_lock ( ) ;
let start = Instant ::now ( ) ;
let start = Instant ::now ( ) ;
@ -389,9 +223,28 @@ impl Tree for SqliteTable {
}
}
#[ tracing::instrument(skip(self)) ]
#[ tracing::instrument(skip(self)) ]
fn iter < ' a > ( & ' a self ) -> Box < dyn Iterator < Item = TupleOfBytes > + Send + ' a > {
fn iter < ' a > ( & ' a self ) -> Box < dyn Iterator < Item = TupleOfBytes > + ' a > {
let name = self . name . clone ( ) ;
let guard = self . engine . read_lock ( ) ;
self . iter_from_thread ( format! ( "SELECT key, value FROM {}" , name ) , None )
let statement = Box ::leak ( Box ::new (
guard
. prepare ( & format! ( "SELECT key, value FROM {}" , & self . name ) )
. unwrap ( ) ,
) ) ;
let statement_ref = NonAliasingBox ( statement ) ;
let iterator = Box ::new (
statement
. query_map ( [ ] , | row | Ok ( ( row . get_unwrap ( 0 ) , row . get_unwrap ( 1 ) ) ) )
. unwrap ( )
. map ( | r | r . unwrap ( ) ) ,
) ;
Box ::new ( PreparedStatementIterator {
iterator ,
statement_ref ,
} )
}
}
#[ tracing::instrument(skip(self, from, backwards)) ]
#[ tracing::instrument(skip(self, from, backwards)) ]
@ -399,31 +252,61 @@ impl Tree for SqliteTable {
& ' a self ,
& ' a self ,
from : & [ u8 ] ,
from : & [ u8 ] ,
backwards : bool ,
backwards : bool ,
) -> Box < dyn Iterator < Item = TupleOfBytes > + Send + ' a > {
) -> Box < dyn Iterator < Item = TupleOfBytes > + ' a > {
let name = self . name . clone ( ) ;
let guard = self . engine . read_lock ( ) ;
let from = from . to_vec ( ) ; // TODO change interface?
let from = from . to_vec ( ) ; // TODO change interface?
if backwards {
if backwards {
self . iter_from_thread (
let statement = Box ::leak ( Box ::new (
format! (
guard
. prepare ( & format! (
"SELECT key, value FROM {} WHERE key <= ? ORDER BY key DESC" ,
"SELECT key, value FROM {} WHERE key <= ? ORDER BY key DESC" ,
name
& self . name
) ,
) )
Some ( from ) ,
. unwrap ( ) ,
)
) ) ;
let statement_ref = NonAliasingBox ( statement ) ;
let iterator = Box ::new (
statement
. query_map ( [ from ] , | row | Ok ( ( row . get_unwrap ( 0 ) , row . get_unwrap ( 1 ) ) ) )
. unwrap ( )
. map ( | r | r . unwrap ( ) ) ,
) ;
Box ::new ( PreparedStatementIterator {
iterator ,
statement_ref ,
} )
} else {
} else {
self . iter_from_thread (
let statement = Box ::leak ( Box ::new (
format! (
guard
. prepare ( & format! (
"SELECT key, value FROM {} WHERE key >= ? ORDER BY key ASC" ,
"SELECT key, value FROM {} WHERE key >= ? ORDER BY key ASC" ,
name
& self . name
) ,
) )
Some ( from ) ,
. unwrap ( ) ,
)
) ) ;
let statement_ref = NonAliasingBox ( statement ) ;
let iterator = Box ::new (
statement
. query_map ( [ from ] , | row | Ok ( ( row . get_unwrap ( 0 ) , row . get_unwrap ( 1 ) ) ) )
. unwrap ( )
. map ( | r | r . unwrap ( ) ) ,
) ;
Box ::new ( PreparedStatementIterator {
iterator ,
statement_ref ,
} )
}
}
}
}
#[ tracing::instrument(skip(self, key)) ]
#[ tracing::instrument(skip(self, key)) ]
fn increment ( & self , key : & [ u8 ] ) -> Result < Vec < u8 > > {
fn increment ( & self , key : & [ u8 ] ) -> Result < Vec < u8 > > {
let guard = self . engine . pool . write_lock ( ) ;
let guard = self . engine . write_lock ( ) ;
let start = Instant ::now ( ) ;
let start = Instant ::now ( ) ;
@ -445,10 +328,7 @@ impl Tree for SqliteTable {
}
}
#[ tracing::instrument(skip(self, prefix)) ]
#[ tracing::instrument(skip(self, prefix)) ]
fn scan_prefix < ' a > (
fn scan_prefix < ' a > ( & ' a self , prefix : Vec < u8 > ) -> Box < dyn Iterator < Item = TupleOfBytes > + ' a > {
& ' a self ,
prefix : Vec < u8 > ,
) -> Box < dyn Iterator < Item = TupleOfBytes > + Send + ' a > {
// let name = self.name.clone();
// let name = self.name.clone();
// self.iter_from_thread(
// self.iter_from_thread(
// format!(
// format!(
@ -483,25 +363,9 @@ impl Tree for SqliteTable {
fn clear ( & self ) -> Result < ( ) > {
fn clear ( & self ) -> Result < ( ) > {
debug ! ( "clear: running" ) ;
debug ! ( "clear: running" ) ;
self . engine
self . engine
. pool
. write_lock ( )
. write_lock ( )
. execute ( format! ( "DELETE FROM {}" , self . name ) . as_str ( ) , [ ] ) ? ;
. execute ( format! ( "DELETE FROM {}" , self . name ) . as_str ( ) , [ ] ) ? ;
debug ! ( "clear: ran" ) ;
debug ! ( "clear: ran" ) ;
Ok ( ( ) )
Ok ( ( ) )
}
}
}
}
// TODO
// struct Pool<const NUM_READERS: usize> {
// writer: Mutex<Connection>,
// readers: [Mutex<Connection>; NUM_READERS],
// }
// // then, to pick a reader:
// for r in &pool.readers {
// if let Ok(reader) = r.try_lock() {
// // use reader
// }
// }
// // none unlocked, pick the next reader
// pool.readers[pool.counter.fetch_add(1, Relaxed) % NUM_READERS].lock()