feat: Fix the ranking algorithm to sort only the needed documents

This commit is contained in:
Kerollmops
2018-06-25 22:26:49 +02:00
committed by Clément Renault
parent 0190caef4d
commit 23134fee02
7 changed files with 156 additions and 230 deletions

View File

@ -4,7 +4,7 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.68 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -29,7 +29,7 @@ dependencies = [
[[package]]
name = "group-by"
version = "0.1.0"
source = "git+https://github.com/Kerollmops/group-by.git#7e432aa232834b650ca85ecd46056a43a0094dec"
source = "git+https://github.com/Kerollmops/group-by.git#034fadc462dc511ed53f44f6091f8707a27ca392"
[[package]]
name = "itoa"
@ -82,8 +82,8 @@ dependencies = [
"fst 0.3.0 (git+https://github.com/Kerollmops/fst.git?branch=op-builder-with-state)",
"group-by 0.1.0 (git+https://github.com/Kerollmops/group-by.git)",
"levenshtein_automata 0.1.1 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=custom-fst)",
"serde 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.68 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.68 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -91,20 +91,20 @@ name = "raptor-indexer"
version = "0.1.0"
dependencies = [
"raptor 0.1.0",
"serde 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.68 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.68 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)",
"unidecode 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "serde"
version = "1.0.66"
version = "1.0.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "serde_derive"
version = "1.0.66"
version = "1.0.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
@ -119,7 +119,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
"itoa 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.68 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -173,8 +173,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff"
"checksum proc-macro2 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "effdb53b25cdad54f8f48843d67398f7ef2e14f12c1b4cb4effc549a6462a4d6"
"checksum quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e44651a0dc4cdd99f71c83b561e221f714912d11af1a4dff0631f923d53af035"
"checksum serde 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)" = "e9a2d9a9ac5120e0f768801ca2b58ad6eec929dc9d1d616c162f208869c2ce95"
"checksum serde_derive 1.0.66 (registry+https://github.com/rust-lang/crates.io-index)" = "0a90213fa7e0f5eac3f7afe2d5ff6b088af515052cc7303bd68c7e3b91a3fb79"
"checksum serde 1.0.68 (registry+https://github.com/rust-lang/crates.io-index)" = "429fcc4efa8a11341b5422c2ace724daba276c1748467e869478f53c0ba4562e"
"checksum serde_derive 1.0.68 (registry+https://github.com/rust-lang/crates.io-index)" = "6a25ad0bf818ed2d180c89addbe29198d1de6c89ed08a48aa6a4d3d16a63cbfe"
"checksum serde_json 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)" = "84b8035cabe9b35878adec8ac5fe03d5f6bc97ff6edd7ccb96b44c1276ba390e"
"checksum syn 0.14.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c67da57e61ebc7b7b6fff56bb34440ca3a83db037320b0507af4c10368deda7d"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"

View File

@ -31,6 +31,12 @@ where P: AsRef<Path>
fs::set_permissions(&path, perms)
}
fn is_readonly<P>(path: P) -> io::Result<bool>
where P: AsRef<Path>
{
fs::metadata(&path).map(|m| m.permissions().readonly())
}
fn main() {
let data = File::open("products.json_lines").unwrap();
let data = BufReader::new(data);
@ -54,6 +60,18 @@ fn main() {
}
};
let map_file = "map.fst";
let values_file = "values.vecs";
for file in &[map_file, values_file] {
match is_readonly(file) {
Ok(true) => panic!("the {:?} file is readonly, please make it writeable", file),
Err(ref e) if e.kind() == io::ErrorKind::NotFound => (),
Err(e) => panic!("{:?}", e),
_ => (),
}
}
let mut builder = DocIndexMapBuilder::new();
for line in data.lines() {
let line = line.unwrap();
@ -83,13 +101,13 @@ fn main() {
}
}
let map = File::create("map.fst").unwrap();
let values = File::create("values.vecs").unwrap();
let map = File::create(map_file).unwrap();
let values = File::create(values_file).unwrap();
let (map, values) = builder.build(map, values).unwrap();
set_readonly("map.fst", true).unwrap();
set_readonly("values.vecs", true).unwrap();
set_readonly(map_file, true).unwrap();
set_readonly(values_file, true).unwrap();
println!("Checking the dump consistency...");
unsafe { DocIndexMap::from_paths("map.fst", "values.vecs").unwrap() };