mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 13:36:27 +00:00 
			
		
		
		
	feat(search): Accept multiple words and do a simple union
This commit is contained in:
		
				
					committed by
					
						 Clément Renault
						Clément Renault
					
				
			
			
				
	
			
			
			
						parent
						
							758baeb8e1
						
					
				
				
					commit
					1476aa3dba
				
			
							
								
								
									
										10
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										10
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -85,7 +85,6 @@ dependencies = [ | |||||||
| [[package]] | [[package]] | ||||||
| name = "fst" | name = "fst" | ||||||
| version = "0.3.0" | version = "0.3.0" | ||||||
| source = "git+https://github.com/Kerollmops/fst.git?branch=stream-with-state#a969462433944a22f1356a8bf2affb8e9bde6f67" |  | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "byteorder 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)", |  "byteorder 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)", | ||||||
|  "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", |  "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", | ||||||
| @@ -161,9 +160,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||||||
| [[package]] | [[package]] | ||||||
| name = "levenshtein_automata" | name = "levenshtein_automata" | ||||||
| version = "0.1.0" | version = "0.1.0" | ||||||
| source = "git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst#13a685e087efcf253936342c055166fa5d5c9b9c" |  | ||||||
| dependencies = [ | dependencies = [ | ||||||
|  "fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=stream-with-state)", |  "fst 0.3.0", | ||||||
| ] | ] | ||||||
|  |  | ||||||
| [[package]] | [[package]] | ||||||
| @@ -305,9 +303,9 @@ version = "0.1.0" | |||||||
| dependencies = [ | dependencies = [ | ||||||
|  "bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", |  "bincode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", | ||||||
|  "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", |  "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", | ||||||
|  "fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=stream-with-state)", |  "fst 0.3.0", | ||||||
|  "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", |  "futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)", | ||||||
|  "levenshtein_automata 0.1.0 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)", |  "levenshtein_automata 0.1.0", | ||||||
|  "serde 1.0.45 (registry+https://github.com/rust-lang/crates.io-index)", |  "serde 1.0.45 (registry+https://github.com/rust-lang/crates.io-index)", | ||||||
|  "serde_derive 1.0.45 (registry+https://github.com/rust-lang/crates.io-index)", |  "serde_derive 1.0.45 (registry+https://github.com/rust-lang/crates.io-index)", | ||||||
|  "serde_json 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)", |  "serde_json 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)", | ||||||
| @@ -651,7 +649,6 @@ dependencies = [ | |||||||
| "checksum crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d636a8b3bcc1b409d7ffd3facef8f21dcb4009626adbd0c5e6c4305c07253c7b" | "checksum crossbeam-utils 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d636a8b3bcc1b409d7ffd3facef8f21dcb4009626adbd0c5e6c4305c07253c7b" | ||||||
| "checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab" | "checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab" | ||||||
| "checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" | "checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" | ||||||
| "checksum fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=stream-with-state)" = "<none>" |  | ||||||
| "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" | "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" | ||||||
| "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" | "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" | ||||||
| "checksum futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)" = "1a70b146671de62ec8c8ed572219ca5d594d9b06c0b364d5e67b722fc559b48c" | "checksum futures 0.1.21 (registry+https://github.com/rust-lang/crates.io-index)" = "1a70b146671de62ec8c8ed572219ca5d594d9b06c0b364d5e67b722fc559b48c" | ||||||
| @@ -662,7 +659,6 @@ dependencies = [ | |||||||
| "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" | "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" | ||||||
| "checksum lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c8f31047daa365f19be14b47c29df4f7c3b581832407daabe6ae77397619237d" | "checksum lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c8f31047daa365f19be14b47c29df4f7c3b581832407daabe6ae77397619237d" | ||||||
| "checksum lazycell 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a6f08839bc70ef4a3fe1d566d5350f519c5912ea86be0df1740a7d247c7fc0ef" | "checksum lazycell 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a6f08839bc70ef4a3fe1d566d5350f519c5912ea86be0df1740a7d247c7fc0ef" | ||||||
| "checksum levenshtein_automata 0.1.0 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)" = "<none>" |  | ||||||
| "checksum libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)" = "6fd41f331ac7c5b8ac259b8bf82c75c0fb2e469bbf37d2becbba9a6a2221965b" | "checksum libc 0.2.40 (registry+https://github.com/rust-lang/crates.io-index)" = "6fd41f331ac7c5b8ac259b8bf82c75c0fb2e469bbf37d2becbba9a6a2221965b" | ||||||
| "checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b" | "checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b" | ||||||
| "checksum log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "89f010e843f2b1a31dbd316b3b8d443758bc634bed37aabade59c686d644e0a2" | "checksum log 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "89f010e843f2b1a31dbd316b3b8d443758bc634bed37aabade59c686d644e0a2" | ||||||
|   | |||||||
							
								
								
									
										14
									
								
								Cargo.toml
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								Cargo.toml
									
									
									
									
									
								
							| @@ -16,13 +16,15 @@ tokio-service = "0.1" | |||||||
| url = "1.7" | url = "1.7" | ||||||
|  |  | ||||||
| [dependencies.fst] | [dependencies.fst] | ||||||
| git = "https://github.com/Kerollmops/fst.git" | path = "../../fst" | ||||||
| branch = "stream-with-state" | # git = "https://github.com/Kerollmops/fst.git" | ||||||
|  | # branch = "stream-with-state" | ||||||
|  |  | ||||||
| [dependencies.levenshtein_automata] | [dependencies.levenshtein_automata] | ||||||
| git = "https://github.com/Kerollmops/levenshtein-automata.git" | path = "../../levenshtein-automata" | ||||||
| branch = "custom-fst" | # git = "https://github.com/Kerollmops/levenshtein-automata.git" | ||||||
|  | # branch = "custom-fst" | ||||||
| features = ["fst_automaton"] | features = ["fst_automaton"] | ||||||
|  |  | ||||||
| [profile.release] | # [profile.release] | ||||||
| lto = true | # lto = true | ||||||
|   | |||||||
| @@ -13,14 +13,14 @@ use std::path::Path; | |||||||
| use std::fs::File; | use std::fs::File; | ||||||
| use std::io::{Read, BufReader}; | use std::io::{Read, BufReader}; | ||||||
|  |  | ||||||
| use fst::{IntoStreamer, Streamer}; | use fst::Streamer; | ||||||
| use futures::future; | use futures::future; | ||||||
| use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder; | use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder; | ||||||
| use tokio_minihttp::{Request, Response, Http}; | use tokio_minihttp::{Request, Response, Http}; | ||||||
| use tokio_proto::TcpServer; | use tokio_proto::TcpServer; | ||||||
| use tokio_service::Service; | use tokio_service::Service; | ||||||
|  |  | ||||||
| use raptor::FstMap; | use raptor::{FstMap, OpWithStateBuilder}; | ||||||
|  |  | ||||||
| static mut MAP: Option<FstMap<u64>> = None; | static mut MAP: Option<FstMap<u64>> = None; | ||||||
| static mut LEV_BUILDER_0: Option<LevBuilder> = None; | static mut LEV_BUILDER_0: Option<LevBuilder> = None; | ||||||
| @@ -52,6 +52,9 @@ impl<'a> Service for MainService<'a> { | |||||||
|         if let Some((_, query)) = url.query_pairs().find(|&(ref k, _)| k == "q") { |         if let Some((_, query)) = url.query_pairs().find(|&(ref k, _)| k == "q") { | ||||||
|             let query = query.to_lowercase(); |             let query = query.to_lowercase(); | ||||||
|  |  | ||||||
|  |             let mut automatons = Vec::new(); | ||||||
|  |  | ||||||
|  |             for query in query.split_whitespace() { | ||||||
|                 let lev = if query.len() <= 4 { |                 let lev = if query.len() <= 4 { | ||||||
|                     self.lev_builder_0.build_dfa(&query) |                     self.lev_builder_0.build_dfa(&query) | ||||||
|                 } else if query.len() <= 8 { |                 } else if query.len() <= 8 { | ||||||
| @@ -59,18 +62,30 @@ impl<'a> Service for MainService<'a> { | |||||||
|                 } else { |                 } else { | ||||||
|                     self.lev_builder_2.build_dfa(&query) |                     self.lev_builder_2.build_dfa(&query) | ||||||
|                 }; |                 }; | ||||||
|  |                 automatons.push(lev); | ||||||
|  |             } | ||||||
|  |  | ||||||
|             let mut stream = self.map.search(&lev).with_state().into_stream(); |             let mut op = OpWithStateBuilder::new(self.map.values()); | ||||||
|  |  | ||||||
|  |             for automaton in automatons.iter().cloned() { | ||||||
|  |                 let stream = self.map.as_map().search(automaton).with_state(); | ||||||
|  |                 op.push(stream); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             let mut stream = op.union(); | ||||||
|  |  | ||||||
|             let mut body = String::new(); |             let mut body = String::new(); | ||||||
|             body.push_str("<html><body>"); |             body.push_str("<html><body>"); | ||||||
|  |  | ||||||
|             while let Some((key, values, state)) = stream.next() { |             while let Some((key, ivalues)) = stream.next() { | ||||||
|                 match std::str::from_utf8(key) { |                 match std::str::from_utf8(key) { | ||||||
|                     Ok(key) => { |                     Ok(key) => { | ||||||
|                         let values = &values[..values.len().min(10)]; |                         for ivalue in ivalues { | ||||||
|                         let distance = lev.distance(state); |                             let i = ivalue.index; | ||||||
|                         body.push_str(&format!("<p>{:?} (dist: {:?}) {:?}</p>", key, distance, values)); |                             let state = ivalue.state; | ||||||
|  |                             let distance = automatons[i].distance(state); | ||||||
|  |                             body.push_str(&format!("<p>{:?} (dist: {:?}) {:?}</p>", key, distance, ivalue.values)); | ||||||
|  |                         } | ||||||
|                     }, |                     }, | ||||||
|                     Err(e) => eprintln!("{:?}", e), |                     Err(e) => eprintln!("{:?}", e), | ||||||
|                 } |                 } | ||||||
|   | |||||||
							
								
								
									
										170
									
								
								src/fst_map.rs
									
									
									
									
									
								
							
							
						
						
									
										170
									
								
								src/fst_map.rs
									
									
									
									
									
								
							| @@ -1,5 +1,5 @@ | |||||||
| use bincode; | use bincode; | ||||||
| use fst::{self, Map, MapBuilder, Automaton}; | use fst::{self, Automaton}; | ||||||
| use serde::de::DeserializeOwned; | use serde::de::DeserializeOwned; | ||||||
| use serde::ser::Serialize; | use serde::ser::Serialize; | ||||||
| use std::fs::File; | use std::fs::File; | ||||||
| @@ -10,7 +10,7 @@ use {StreamBuilder, Stream}; | |||||||
|  |  | ||||||
| #[derive(Debug)] | #[derive(Debug)] | ||||||
| pub struct FstMap<T> { | pub struct FstMap<T> { | ||||||
|     inner: Map, |     inner: fst::Map, | ||||||
|     values: Values<T>, |     values: Values<T>, | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -21,7 +21,7 @@ impl<T> FstMap<T> { | |||||||
|         P: AsRef<Path>, |         P: AsRef<Path>, | ||||||
|         Q: AsRef<Path> |         Q: AsRef<Path> | ||||||
|     { |     { | ||||||
|         let inner = Map::from_path(map)?; |         let inner = fst::Map::from_path(map)?; | ||||||
|  |  | ||||||
|         // TODO handle errors !!! |         // TODO handle errors !!! | ||||||
|         let values = File::open(values).unwrap(); |         let values = File::open(values).unwrap(); | ||||||
| @@ -35,7 +35,7 @@ impl<T> FstMap<T> { | |||||||
|     where |     where | ||||||
|         T: DeserializeOwned |         T: DeserializeOwned | ||||||
|     { |     { | ||||||
|         let inner = Map::from_bytes(map)?; |         let inner = fst::Map::from_bytes(map)?; | ||||||
|         let values = bincode::deserialize(values).unwrap(); |         let values = bincode::deserialize(values).unwrap(); | ||||||
|  |  | ||||||
|         Ok(Self { inner, values }) |         Ok(Self { inner, values }) | ||||||
| @@ -62,6 +62,19 @@ impl<T> FstMap<T> { | |||||||
|             values: &self.values, |             values: &self.values, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn op(&self) -> OpBuilder<T> { | ||||||
|  |         // OpBuilder::new(&self.values).add(self.as_inner()) | ||||||
|  |         unimplemented!() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn as_map(&self) -> &fst::Map { | ||||||
|  |         &self.inner | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn values(&self) -> &Values<T> { | ||||||
|  |         &self.values | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Serialize, Deserialize)] | #[derive(Debug, Serialize, Deserialize)] | ||||||
| @@ -137,7 +150,7 @@ impl<T> FstMapBuilder<T> { | |||||||
|  |  | ||||||
|     pub fn build_memory(self) -> fst::Result<FstMap<T>> { |     pub fn build_memory(self) -> fst::Result<FstMap<T>> { | ||||||
|         Ok(FstMap { |         Ok(FstMap { | ||||||
|             inner: Map::from_iter(self.map)?, |             inner: fst::Map::from_iter(self.map)?, | ||||||
|             values: Values::new(self.values), |             values: Values::new(self.values), | ||||||
|         }) |         }) | ||||||
|     } |     } | ||||||
| @@ -148,7 +161,7 @@ impl<T> FstMapBuilder<T> { | |||||||
|         W: Write, |         W: Write, | ||||||
|         X: Write |         X: Write | ||||||
|     { |     { | ||||||
|         let mut builder = MapBuilder::new(map_wrt)?; |         let mut builder = fst::MapBuilder::new(map_wrt)?; | ||||||
|         builder.extend_iter(self.map)?; |         builder.extend_iter(self.map)?; | ||||||
|         let map = builder.into_inner()?; |         let map = builder.into_inner()?; | ||||||
|         let values = Values::new(self.values); |         let values = Values::new(self.values); | ||||||
| @@ -159,3 +172,148 @@ impl<T> FstMapBuilder<T> { | |||||||
|         Ok((map, values_wrt)) |         Ok((map, values_wrt)) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | pub struct OpBuilder<'m, 'v, T: 'v> { | ||||||
|  |     inner: fst::map::OpBuilder<'m>, | ||||||
|  |     values: &'v Values<T>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'m, 'v, T: 'v> OpBuilder<'m, 'v, T> { | ||||||
|  |     pub fn new(values: &'v Values<T>) -> Self { | ||||||
|  |         OpBuilder { | ||||||
|  |             inner: fst::map::OpBuilder::new(), | ||||||
|  |             values: values, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn add<I, S>(mut self, streamable: I) -> Self | ||||||
|  |     where | ||||||
|  |         I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64)>, | ||||||
|  |         S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64)>, | ||||||
|  |     { | ||||||
|  |         self.push(streamable); | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn push<I, S>(&mut self, streamable: I) | ||||||
|  |     where | ||||||
|  |         I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64)>, | ||||||
|  |         S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64)>, | ||||||
|  |     { | ||||||
|  |         self.inner.push(streamable); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn union(self) -> Union<'m, 'v, T> { | ||||||
|  |         Union { | ||||||
|  |             inner: self.inner.union(), | ||||||
|  |             outs: Vec::new(), | ||||||
|  |             values: self.values, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub struct Union<'m, 'v, T: 'v> { | ||||||
|  |     inner: fst::map::Union<'m>, | ||||||
|  |     outs: Vec<IndexedValues<'v, T>>, | ||||||
|  |     values: &'v Values<T>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'a, 'm, 'v, T: 'v + 'a> fst::Streamer<'a> for Union<'m, 'v, T> { | ||||||
|  |     type Item = (&'a [u8], &'a [IndexedValues<'a, T>]); | ||||||
|  |  | ||||||
|  |     fn next(&'a mut self) -> Option<Self::Item> { | ||||||
|  |         match self.inner.next() { | ||||||
|  |             Some((s, ivalues)) => { | ||||||
|  |                 self.outs.clear(); | ||||||
|  |                 for ivalue in ivalues { | ||||||
|  |                     let index = ivalue.index; | ||||||
|  |                     let values = unsafe { self.values.get_unchecked(ivalue.value as usize) }; | ||||||
|  |                     self.outs.push(IndexedValues { index, values }) | ||||||
|  |                 } | ||||||
|  |                 Some((s, &self.outs)) | ||||||
|  |             }, | ||||||
|  |             None => None, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug)] | ||||||
|  | pub struct IndexedValues<'a, T: 'a> { | ||||||
|  |     pub index: usize, | ||||||
|  |     pub values: &'a [T], | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub struct OpWithStateBuilder<'m, 'v, T: 'v, U> { | ||||||
|  |     inner: fst::map::OpWithStateBuilder<'m, U>, | ||||||
|  |     values: &'v Values<T>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'m, 'v, T: 'v, U: 'static> OpWithStateBuilder<'m, 'v, T, U> { | ||||||
|  |     pub fn new(values: &'v Values<T>) -> Self { | ||||||
|  |         Self { | ||||||
|  |             inner: fst::map::OpWithStateBuilder::new(), | ||||||
|  |             values: values, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn add<I, S>(mut self, streamable: I) -> Self | ||||||
|  |     where | ||||||
|  |         I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>, | ||||||
|  |         S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>, | ||||||
|  |     { | ||||||
|  |         self.push(streamable); | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn push<I, S>(&mut self, streamable: I) | ||||||
|  |     where | ||||||
|  |         I: for<'a> fst::IntoStreamer<'a, Into=S, Item=(&'a [u8], u64, U)>, | ||||||
|  |         S: 'm + for<'a> fst::Streamer<'a, Item=(&'a [u8], u64, U)>, | ||||||
|  |     { | ||||||
|  |         self.inner.push(streamable); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn union(self) -> UnionWithState<'m, 'v, T, U> { | ||||||
|  |         UnionWithState { | ||||||
|  |             inner: self.inner.union(), | ||||||
|  |             outs: Vec::new(), | ||||||
|  |             values: self.values, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | pub struct UnionWithState<'m, 'v, T: 'v, U> { | ||||||
|  |     inner: fst::map::UnionWithState<'m, U>, | ||||||
|  |     outs: Vec<IndexedValuesWithState<'v, T, U>>, | ||||||
|  |     values: &'v Values<T>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'a, 'm, 'v, T: 'v + 'a, U: 'a> fst::Streamer<'a> for UnionWithState<'m, 'v, T, U> | ||||||
|  | where | ||||||
|  |     U: Clone, | ||||||
|  | { | ||||||
|  |     type Item = (&'a [u8], &'a [IndexedValuesWithState<'a, T, U>]); | ||||||
|  |  | ||||||
|  |     fn next(&'a mut self) -> Option<Self::Item> { | ||||||
|  |         match self.inner.next() { | ||||||
|  |             Some((s, ivalues)) => { | ||||||
|  |                 self.outs.clear(); | ||||||
|  |                 for ivalue in ivalues { | ||||||
|  |                     let index = ivalue.index; | ||||||
|  |                     let values = unsafe { self.values.get_unchecked(ivalue.value as usize) }; | ||||||
|  |                     let state = ivalue.state.clone(); | ||||||
|  |                     self.outs.push(IndexedValuesWithState { index, values, state }) | ||||||
|  |                 } | ||||||
|  |                 Some((s, &self.outs)) | ||||||
|  |             }, | ||||||
|  |             None => None, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug)] | ||||||
|  | pub struct IndexedValuesWithState<'a, T: 'a, U> { | ||||||
|  |     pub index: usize, | ||||||
|  |     pub values: &'a [T], | ||||||
|  |     pub state: U, | ||||||
|  | } | ||||||
|   | |||||||
| @@ -5,13 +5,13 @@ extern crate serde; | |||||||
|  |  | ||||||
| mod fst_map; | mod fst_map; | ||||||
|  |  | ||||||
| use std::ops::Range; |  | ||||||
| use std::io::{Write, BufReader}; |  | ||||||
| use std::fs::File; |  | ||||||
| use std::path::Path; |  | ||||||
| use fst::Automaton; | use fst::Automaton; | ||||||
|  |  | ||||||
| pub use self::fst_map::{FstMap, FstMapBuilder}; | pub use self::fst_map::{FstMap, FstMapBuilder}; | ||||||
|  | pub use self::fst_map::{ | ||||||
|  |     OpBuilder, IndexedValues, | ||||||
|  |     OpWithStateBuilder, IndexedValuesWithState, | ||||||
|  | }; | ||||||
| use self::fst_map::Values; | use self::fst_map::Values; | ||||||
|  |  | ||||||
| pub struct StreamBuilder<'m, 'v, T: 'v, A> { | pub struct StreamBuilder<'m, 'v, T: 'v, A> { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user