mirror of
https://github.com/sirjonasxx/G-Earth.git
synced 2024-11-23 08:50:52 +01:00
utf-8 string prediction
This commit is contained in:
parent
fbef4ad4ab
commit
e4c5a941ee
@ -64,13 +64,29 @@ public class StringChecker extends TypeChecker<String> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
for (int i = 0; i < s.length(); i++) {
|
for (int i = 0; i < s.length(); i++) {
|
||||||
score *= penalties[isCommon(
|
|
||||||
asChars[i],
|
|
||||||
asBytes[i]
|
|
||||||
)];
|
|
||||||
|
|
||||||
if (score < 0.001) {
|
// detect UTF8 extended chars
|
||||||
return 0;
|
if ((asBytes[i] & 0b11100000) == 0b11000000 && i < s.length() - 1 && (asBytes[i+1] & 0b11000000) == 0b10000000) {
|
||||||
|
i += 1;
|
||||||
|
score *= penalties[2]*penalties[2];
|
||||||
|
}
|
||||||
|
else if ((asBytes[i] & 0b11110000) == 0b11100000 && i < s.length() - 2 && (asBytes[i+1] & 0b11000000) == 0b10000000 && (asBytes[i+2] & 0b11000000) == 0b10000000) {
|
||||||
|
i += 2;
|
||||||
|
score *= penalties[2]*penalties[2]*penalties[2];
|
||||||
|
}
|
||||||
|
else if ((asBytes[i] & 0b11111000) == 0b11110000 && i < s.length() - 3 && (asBytes[i+1] & 0b11000000) == 0b10000000 && (asBytes[i+2] & 0b11000000) == 0b10000000 && (asBytes[i+3] & 0b11000000) == 0b10000000) {
|
||||||
|
i += 3;
|
||||||
|
score *= penalties[2]*penalties[2]*penalties[2]*penalties[2];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
score *= penalties[isCommon(
|
||||||
|
asChars[i],
|
||||||
|
asBytes[i]
|
||||||
|
)];
|
||||||
|
|
||||||
|
if (score < 0.001) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user