From bd79a933ddcc99ef1f769ed8d832f83ce2859e2c Mon Sep 17 00:00:00 2001 From: WimPum Date: Thu, 11 Dec 2025 06:03:29 +0900 Subject: [PATCH 1/2] Add mojibake extension None check Reorder options to set correct default --- icons/org.inkscape.text.mojibake.svg | 46 ++++++++++++ mojibake.inx | 29 ++++++++ mojibake.py | 40 ++++++++++ ...bake__--source__utf_8__--target__cp932.out | 0 tests/test_mojibake.py | 74 +++++++++++++++++++ 5 files changed, 189 insertions(+) create mode 100644 icons/org.inkscape.text.mojibake.svg create mode 100644 mojibake.inx create mode 100644 mojibake.py create mode 100644 tests/data/refs/mojibake__--source__utf_8__--target__cp932.out create mode 100644 tests/test_mojibake.py diff --git a/icons/org.inkscape.text.mojibake.svg b/icons/org.inkscape.text.mojibake.svg new file mode 100644 index 00000000..f04d407a --- /dev/null +++ b/icons/org.inkscape.text.mojibake.svg @@ -0,0 +1,46 @@ + + + + + + + + + + + diff --git a/mojibake.inx b/mojibake.inx new file mode 100644 index 00000000..caef1de7 --- /dev/null +++ b/mojibake.inx @@ -0,0 +1,29 @@ + + + + Mojibake + org.inkscape.text.mojibake + + + + + + + + + + + + + + + + all + + + + + + \ No newline at end of file diff --git a/mojibake.py b/mojibake.py new file mode 100644 index 00000000..964d3bd9 --- /dev/null +++ b/mojibake.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +"""mojibake: very basic character reinterpreter""" + +# Copyright (c) 2025 WimPum +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +import inkex + + +class Mojibake(inkex.TextExtension): + def add_arguments(self, pars): + """Add command line arguments and inx parameter.""" + pars.add_argument("--source", type=str, default="utf_8") + pars.add_argument("--target", type=str, default="cp932") + + def process_chardata(self, text): + """ + Encodes the text in `source` encoding and decodes in `target` encoding. + """ + if not self.options.source or not self.options.target: + return text + b = text.encode(self.options.source, "replace") + return b.decode(self.options.target, "replace") + + +if __name__ == "__main__": + Mojibake().run() diff --git a/tests/data/refs/mojibake__--source__utf_8__--target__cp932.out b/tests/data/refs/mojibake__--source__utf_8__--target__cp932.out new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_mojibake.py b/tests/test_mojibake.py new file mode 100644 index 00000000..af4a6a6f --- /dev/null +++ b/tests/test_mojibake.py @@ -0,0 +1,74 @@ +"""Test mojibake extension.""" + +import random +import string + +from inkex.tester import ComparisonMixin, TestCase +from mojibake import Mojibake + + +class TestMojibake(ComparisonMixin, TestCase): + effect_class = Mojibake + comparisons = [("--source=utf_8", "--target=cp932")] + + encodings = [ + "utf_8", + "cp932", + "euc_jp", + "cp1252", + "gbk", + ] + + @staticmethod + def random_string(length=20): + """Generate random string that contains ascii + CJK letters""" + jp = "あいうえお漢字文字列試験東京大阪" + zh = "汉字测试北京上海广州深圳" + kr = "한글테스트대한민국서울부산" + en = string.ascii_letters + pool = jp + zh + kr + en + return "".join(random.choice(pool) for _ in range(length)) + + @staticmethod + def is_xml_safe(text): + """Check if converted text is XML-safe.""" + for ch in text: + if ord(ch) == 0: + return False + if 0 < ord(ch) < 32: + if ch not in ("\t", "\n", "\r"): + return False + return True + + def test_utf8_identity(self): + """Ensure that string stays the same when the same encodings are chosen.""" + self.effect.options.source = "utf_8" + self.effect.options.target = "utf_8" + text = self.random_string() + self.assertEqual(self.effect.process_chardata(text), text) + + def test_utf8_to_other_changes(self): + """Ensure that reinterpreting changes string.""" + for target in self.encodings: + if target == "utf_8": + continue + with self.subTest(target=target): + self.effect.options.source = "utf_8" + self.effect.options.target = target + text = self.random_string() + result = self.effect.process_chardata(text) + + self.assertTrue(self.is_xml_safe(result)) + self.assertNotEqual(result, text) + + def test_all_pairs_xml_safe(self): + """Ensure that all conversion will not generate escaped letters""" + text = self.random_string() + + for source in self.encodings: + for target in self.encodings: + with self.subTest(source=source, target=target): + self.effect.options.source = source + self.effect.options.target = target + result = self.effect.process_chardata(text) + self.assertTrue(self.is_xml_safe(result)) -- GitLab From 9e54fbed63908dc47e7de02eefa9492c52dd7d52 Mon Sep 17 00:00:00 2001 From: WimPum Date: Fri, 12 Dec 2025 13:27:45 +0900 Subject: [PATCH 2/2] Add Japanese text test --- ...bake__--source__utf_8__--target__cp932.out | 20 +++++ tests/data/svg/text_japanese.svg | 74 +++++++++++++++++++ tests/test_mojibake.py | 1 + 3 files changed, 95 insertions(+) create mode 100644 tests/data/svg/text_japanese.svg diff --git a/tests/data/refs/mojibake__--source__utf_8__--target__cp932.out b/tests/data/refs/mojibake__--source__utf_8__--target__cp932.out index e69de29b..d77608d8 100644 --- a/tests/data/refs/mojibake__--source__utf_8__--target__cp932.out +++ b/tests/data/refs/mojibake__--source__utf_8__--target__cp932.out @@ -0,0 +1,20 @@ + + + + + + 縺ゅ�ョ繧、繝シ繝上ヨ繝シ繝エ繧ゥ縺ョ縺吶″縺ィ縺翫▲縺滄「ィ縲� +螟上〒繧ょコ輔↓蜀キ縺溘&繧偵b縺、髱�縺�縺昴i縲� +縺�縺、縺上@縺�譽ョ縺ァ鬟セ繧峨l縺溘Δ繝ェ繝シ繧ェ蟶ゅ€� +驛雁、�縺ョ縺弱i縺弱i縺イ縺九k闕�縺ョ豕「縲� + + \ No newline at end of file diff --git a/tests/data/svg/text_japanese.svg b/tests/data/svg/text_japanese.svg new file mode 100644 index 00000000..96f5af1f --- /dev/null +++ b/tests/data/svg/text_japanese.svg @@ -0,0 +1,74 @@ + + + + + + + + あのイーハトーヴォのすきとおった風、 +夏でも底に冷たさをもついそら、 +うつくしい森で飾られたモリーオ市、 +郊外のぎらぎらひかる波。 + + diff --git a/tests/test_mojibake.py b/tests/test_mojibake.py index af4a6a6f..69474881 100644 --- a/tests/test_mojibake.py +++ b/tests/test_mojibake.py @@ -10,6 +10,7 @@ from mojibake import Mojibake class TestMojibake(ComparisonMixin, TestCase): effect_class = Mojibake comparisons = [("--source=utf_8", "--target=cp932")] + compare_file = "svg/text_japanese.svg" encodings = [ "utf_8", -- GitLab