-
Notifications
You must be signed in to change notification settings - Fork 1
/
detection.rs
52 lines (51 loc) · 1.53 KB
/
detection.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
use crate::util::{is_cyrillic, is_katakana};
use crate::Script;
/// Detects the script type of a given Ainu language string.
///
/// This function categorizes the script into one of several types based on the characters present in the string.
/// It supports Latin, Cyrillic, Katakana, and Hangul scripts, and can also identify mixed or unknown scripts.
///
/// # Arguments
///
/// * `s` - The text string to be analyzed for script type.
///
/// # Returns
///
/// * `Script` - The detected script type:
/// * `Kana` for Katakana
/// * `Cyrl` for Cyrillic
/// * `Latn` for Latin
/// * `Mixed` if multiple scripts are detected (excluding Hangul)
/// * `Unknown` if no script is detected
///
/// # Example
///
/// ```
/// use ainconv::{detect, Script};
/// let script = detect("アイヌ");
/// assert_eq!(script, Script::Kana);
/// ```
pub fn detect(s: &str) -> Script {
let has_latin = s.chars().any(|c| c.is_alphabetic() && c.is_ascii());
let has_cyrillic = s.chars().any(|c| c.is_alphabetic() && is_cyrillic(c));
let has_kana = s.chars().any(|c| c.is_alphabetic() && is_katakana(c));
// let has_hangul = s.chars().any(|c| c.is_alphabetic() && is_hangul(c));
if [has_latin, has_cyrillic, has_kana]
.iter()
.filter(|&&b| b)
.count()
> 1
{
Script::Mixed
} else if has_kana {
Script::Kana
} else if has_cyrillic {
Script::Cyrl
// } else if has_hangul {
// Script::Hang
} else if has_latin {
Script::Latn
} else {
Script::Unknown
}
}