parent
6a8d0c3175
commit
c77241cb0f
@ -0,0 +1,99 @@
|
|||||||
|
# Copyright 2014 Bernard Yue
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
__doc__ = """
|
||||||
|
Hanzi Converter 繁簡轉換器 | 繁简转换器
|
||||||
|
This module provides functions converting chinese text between simplified and
|
||||||
|
traditional characters. It returns unicode represnetation of the text.
|
||||||
|
Class HanziConv is the main entry point of the module, you can import the
|
||||||
|
class by doing:
|
||||||
|
>>> from hanziconv import HanziConv
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from zhon import cedict
|
||||||
|
|
||||||
|
class HanziConv():
|
||||||
|
"""This class supports hanzi (漢字) convention between simplified and
|
||||||
|
traditional format"""
|
||||||
|
__traditional_charmap = cedict.traditional
|
||||||
|
__simplified_charmap = cedict.simplified
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __convert(cls, text, toTraditional=True):
|
||||||
|
"""Convert `text` to Traditional characters if `toTraditional` is
|
||||||
|
True, else convert to simplified characters
|
||||||
|
:param text: data to convert
|
||||||
|
:param toTraditional: True -- convert to traditional text
|
||||||
|
False -- covert to simplified text
|
||||||
|
:returns: converted 'text`
|
||||||
|
"""
|
||||||
|
if isinstance(text, bytes):
|
||||||
|
text = text.decode('utf-8')
|
||||||
|
|
||||||
|
fromMap = cls.__simplified_charmap
|
||||||
|
toMap = cls.__traditional_charmap
|
||||||
|
if not toTraditional:
|
||||||
|
fromMap = cls.__traditional_charmap
|
||||||
|
toMap = cls.__simplified_charmap
|
||||||
|
|
||||||
|
final = []
|
||||||
|
for c in text:
|
||||||
|
index = fromMap.find(c)
|
||||||
|
if index != -1:
|
||||||
|
final.append(toMap[index])
|
||||||
|
else:
|
||||||
|
final.append(c)
|
||||||
|
return ''.join(final)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def toSimplified(cls, text):
|
||||||
|
"""Convert `text` to simplified character string. Assuming text is
|
||||||
|
traditional character string
|
||||||
|
:param text: text to convert
|
||||||
|
:returns: converted UTF-8 characters
|
||||||
|
>>> from hanziconv import HanziConv
|
||||||
|
>>> print(HanziConv.toSimplified('繁簡轉換器'))
|
||||||
|
繁简转换器
|
||||||
|
"""
|
||||||
|
return cls.__convert(text, toTraditional=False)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def toTraditional(cls, text):
|
||||||
|
"""Convert `text` to traditional character string. Assuming text is
|
||||||
|
simplified character string
|
||||||
|
:param text: text to convert
|
||||||
|
:returns: converted UTF-8 characters
|
||||||
|
>>> from hanziconv import HanziConv
|
||||||
|
>>> print(HanziConv.toTraditional('繁简转换器'))
|
||||||
|
繁簡轉換器
|
||||||
|
"""
|
||||||
|
return cls.__convert(text, toTraditional=True)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def same(cls, text1, text2):
|
||||||
|
"""Return True if text1 and text2 meant literally the same, False
|
||||||
|
otherwise
|
||||||
|
:param text1: string to compare to ``text2``
|
||||||
|
:param text2: string to compare to ``text1``
|
||||||
|
:returns: **True** -- ``text1`` and ``text2`` are the same in meaning,
|
||||||
|
**False** -- otherwise
|
||||||
|
>>> from hanziconv import HanziConv
|
||||||
|
>>> print(HanziConv.same('繁简转换器', '繁簡轉換器'))
|
||||||
|
True
|
||||||
|
"""
|
||||||
|
t1 = cls.toSimplified(text1)
|
||||||
|
t2 = cls.toSimplified(text2)
|
||||||
|
return t1 == t2
|
Loading…
Reference in new issue