Introduce a better query and document lexer

This commit is contained in:
Clément Renault
2020-08-15 20:37:13 +02:00
parent 1e358e3ae8
commit 8806fcd545
7 changed files with 117 additions and 13 deletions

View File

@ -1,4 +1,5 @@
use std::{mem, str};
use unicode_linebreak::{break_property, BreakClass};
use QueryToken::{Quoted, Free};
@ -8,6 +9,7 @@ pub enum QueryToken<'a> {
Quoted(&'a str),
}
#[derive(Debug)]
enum State {
Free(usize),
Quoted(usize),
@ -67,8 +69,13 @@ impl<'a> Iterator for QueryTokens<'a> {
},
State::Fused => return None,
}
}
else if !self.state.is_quoted() && !c.is_alphanumeric() {
} else if break_property(c as u32) == BreakClass::Ideographic {
match self.state.replace_by(State::Free(afteri)) {
State::Quoted(s) => return Some(Quoted(&self.string[s..afteri])),
State::Free(s) => return Some(Free(&self.string[s..afteri])),
_ => self.state = State::Free(afteri),
}
} else if !self.state.is_quoted() && !c.is_alphanumeric() {
match self.state.replace_by(State::Free(afteri)) {
State::Free(s) if i > s => return Some(Free(&self.string[s..i])),
_ => self.state = State::Free(afteri),
@ -83,6 +90,15 @@ mod tests {
use super::*;
use QueryToken::{Quoted, Free};
#[test]
fn empty() {
let mut iter = QueryTokens::new("");
assert_eq!(iter.next(), None);
let mut iter = QueryTokens::new(" ");
assert_eq!(iter.next(), None);
}
#[test]
fn one_quoted_string() {
let mut iter = QueryTokens::new("\"hello\"");
@ -154,4 +170,14 @@ mod tests {
assert_eq!(iter.next(), Some(Quoted("monde est beau")));
assert_eq!(iter.next(), None);
}
#[test]
fn chinese() {
let mut iter = QueryTokens::new("汽车男生");
assert_eq!(iter.next(), Some(Free("")));
assert_eq!(iter.next(), Some(Free("")));
assert_eq!(iter.next(), Some(Free("")));
assert_eq!(iter.next(), Some(Free("")));
assert_eq!(iter.next(), None);
}
}