Skip to content

Commit

Permalink
Added UTF-8 validation function
Browse files Browse the repository at this point in the history
  • Loading branch information
R-HYTE committed May 23, 2024
1 parent b3350a8 commit f15a375
Showing 1 changed file with 52 additions and 0 deletions.
52 changes: 52 additions & 0 deletions 0x04-utf8_validation/0-validate_utf8.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/python3
"""
This module contains a function to validate if a given data set represents
a valid UTF-8 encoding. The data set is provided as a list of integers,
where each integer represents a byte (0 <= x <= 255). The function
validUTF8(data) returns True if the data is a valid UTF-8 encoding,
else returns False.
"""


def validUTF8(data):
"""
Determines if a given data set represents a valid UTF-8 encoding.
Parameters:
data (list): A list of integers where each integer represents
a byte (0 <= x <= 255).
Returns:
bool: True if data is a valid UTF-8 encoding, else False.
"""
num_bytes = 0

mask1 = 1 << 7 # 10000000
mask2 = 1 << 6 # 01000000

for byte in data:
byte = byte & 0xFF

if num_bytes == 0:
if (byte & mask1) == 0:
continue
elif (byte & (mask1 | mask2)) == mask1:
return False
elif (byte & (mask1 | mask2 | (mask2 >> 1))) == (mask1 | mask2):
num_bytes = 1
elif (byte & (mask1 | mask2 | (mask2 >> 1) | (mask2 >> 2))) == (
mask1 | mask2 | (mask2 >> 1)
):
num_bytes = 2
elif (byte & (
mask1 | mask2 | (mask2 >> 1) | (mask2 >> 2) | (mask2 >> 3)
)) == (mask1 | mask2 | (mask2 >> 1) | (mask2 >> 2)):
num_bytes = 3
else:
return False
else:
if not (byte & mask1 and not (byte & mask2)):
return False
num_bytes -= 1

return num_bytes == 0

0 comments on commit f15a375

Please sign in to comment.