diff --git a/src/match_rating.rs b/src/match_rating.rs index 7b3a989..bc5020f 100644 --- a/src/match_rating.rs +++ b/src/match_rating.rs @@ -4,6 +4,7 @@ use unicode_segmentation::UnicodeSegmentation; pub fn match_rating_codex(s: &str) -> Result { // match rating only really makes sense on strings + let s = &s.to_uppercase()[..]; let v = UnicodeSegmentation::graphemes(s, true).collect::>(); let mut codex = String::new(); @@ -24,10 +25,12 @@ pub fn match_rating_codex(s: &str) -> Result { } if codex.len() > 6 { - let mut newcodex = String::new(); - newcodex.push_str(codex.get(..3).unwrap()); - newcodex.push_str(codex.get(codex.len() - 3..).unwrap()); - return Ok(newcodex); + // not safe to take a slice without conversion to chars() since there + // can be unicode left, this implementation matches the Python one + // even though MRC really shouldn't be used with unicode chars + let first_three: String = codex.chars().take(3).collect(); + let last_three: String = codex.chars().rev().take(3).collect::().chars().rev().collect(); + return Ok(first_three + &last_three); } Ok(codex) diff --git a/testdata b/testdata index ba7a0af..9eaccb5 160000 --- a/testdata +++ b/testdata @@ -1 +1 @@ -Subproject commit ba7a0afa6509361e55b0e40b49de5b51a9f0f075 +Subproject commit 9eaccb5222c68c043eaaaf471617a5aa530394a9