Ever needed to count the Chinese characters in a string? Here’s a quick hack that works for most Chinese characters:
import re
text = '''你好,请问:地铁站在哪里?'''
def count_chinese_characters(text):
# Define a regular expression pattern for Chinese characters
chinese_char_pattern = r'[\u4e00-\u9fff]'
# Use re.findall to find all Chinese characters in the text
chinese_chars = re.findall(chinese_char_pattern, text)
# Return the count of Chinese characters
return len(chinese_chars)
count = count_chinese_characters(text)
print(f"Number of Chinese characters in the text: {count}")
For a version adapted to run in a web browser (using Gradio), look here.