Use DeepL native glossary

This commit is contained in:
2025-01-19 23:37:58 +00:00
parent e262119e54
commit eb8653696e
6 changed files with 227 additions and 44 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
/vendor/
composer
env.php
/glossary.json

View File

@@ -9,3 +9,4 @@ Telegram bot that handle translating between Simplified Chinese and English usin
1. `composer install`
1. `php set_web_hook.php`
1. It should work now
1. (optional) `php upload-glossary.php` - Feel free to edit the file for your own glossary

View File

@@ -6,5 +6,10 @@
"deeplcom/deepl-php": "^1.1.0",
"longman/telegram-bot": "*",
"steelywing/chinese": "^0.5.3"
},
"config": {
"allow-plugins": {
"php-http/discovery": true
}
}
}

157
composer.lock generated
View File

@@ -8,26 +8,34 @@
"packages": [
{
"name": "deeplcom/deepl-php",
"version": "v1.1.0",
"version": "v1.11.1",
"source": {
"type": "git",
"url": "https://github.com/DeepLcom/deepl-php.git",
"reference": "bb6ab74e3d90a543d7effd5ee2578d450a744c12"
"reference": "004cf73864e8547ece7e49a2b08ed7a2d6672d9c"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/DeepLcom/deepl-php/zipball/bb6ab74e3d90a543d7effd5ee2578d450a744c12",
"reference": "bb6ab74e3d90a543d7effd5ee2578d450a744c12",
"url": "https://api.github.com/repos/DeepLcom/deepl-php/zipball/004cf73864e8547ece7e49a2b08ed7a2d6672d9c",
"reference": "004cf73864e8547ece7e49a2b08ed7a2d6672d9c",
"shasum": ""
},
"require": {
"ext-curl": "*",
"ext-json": "*",
"ext-mbstring": "*",
"php": ">=7.3.0"
"php": ">=7.3.0",
"php-http/discovery": "^1.18",
"php-http/multipart-stream-builder": "^1.3",
"psr/http-client": "^1.0",
"psr/http-client-implementation": "*",
"psr/http-factory-implementation": "*",
"psr/log": "^1.1 || ^2.0 || ^3.0"
},
"require-dev": {
"friendsofphp/php-cs-fixer": "^3",
"guzzlehttp/guzzle": "^7.7.0",
"php-mock/php-mock-phpunit": "^2.6",
"phpunit/phpunit": "^9",
"ramsey/uuid": "^4.2",
"squizlabs/php_codesniffer": "^3.3"
@@ -57,9 +65,9 @@
],
"support": {
"issues": "https://github.com/DeepLcom/deepl-php/issues",
"source": "https://github.com/DeepLcom/deepl-php/tree/v1.1.0"
"source": "https://github.com/DeepLcom/deepl-php/tree/v1.11.1"
},
"time": "2022-09-28T08:44:58+00:00"
"time": "2025-01-17T09:36:14+00:00"
},
{
"name": "guzzlehttp/guzzle",
@@ -493,6 +501,141 @@
],
"time": "2022-09-04T13:22:41+00:00"
},
{
"name": "php-http/discovery",
"version": "1.20.0",
"source": {
"type": "git",
"url": "https://github.com/php-http/discovery.git",
"reference": "82fe4c73ef3363caed49ff8dd1539ba06044910d"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-http/discovery/zipball/82fe4c73ef3363caed49ff8dd1539ba06044910d",
"reference": "82fe4c73ef3363caed49ff8dd1539ba06044910d",
"shasum": ""
},
"require": {
"composer-plugin-api": "^1.0|^2.0",
"php": "^7.1 || ^8.0"
},
"conflict": {
"nyholm/psr7": "<1.0",
"zendframework/zend-diactoros": "*"
},
"provide": {
"php-http/async-client-implementation": "*",
"php-http/client-implementation": "*",
"psr/http-client-implementation": "*",
"psr/http-factory-implementation": "*",
"psr/http-message-implementation": "*"
},
"require-dev": {
"composer/composer": "^1.0.2|^2.0",
"graham-campbell/phpspec-skip-example-extension": "^5.0",
"php-http/httplug": "^1.0 || ^2.0",
"php-http/message-factory": "^1.0",
"phpspec/phpspec": "^5.1 || ^6.1 || ^7.3",
"sebastian/comparator": "^3.0.5 || ^4.0.8",
"symfony/phpunit-bridge": "^6.4.4 || ^7.0.1"
},
"type": "composer-plugin",
"extra": {
"class": "Http\\Discovery\\Composer\\Plugin",
"plugin-optional": true
},
"autoload": {
"psr-4": {
"Http\\Discovery\\": "src/"
},
"exclude-from-classmap": [
"src/Composer/Plugin.php"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Márk Sági-Kazár",
"email": "mark.sagikazar@gmail.com"
}
],
"description": "Finds and installs PSR-7, PSR-17, PSR-18 and HTTPlug implementations",
"homepage": "http://php-http.org",
"keywords": [
"adapter",
"client",
"discovery",
"factory",
"http",
"message",
"psr17",
"psr7"
],
"support": {
"issues": "https://github.com/php-http/discovery/issues",
"source": "https://github.com/php-http/discovery/tree/1.20.0"
},
"time": "2024-10-02T11:20:13+00:00"
},
{
"name": "php-http/multipart-stream-builder",
"version": "1.4.2",
"source": {
"type": "git",
"url": "https://github.com/php-http/multipart-stream-builder.git",
"reference": "10086e6de6f53489cca5ecc45b6f468604d3460e"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/php-http/multipart-stream-builder/zipball/10086e6de6f53489cca5ecc45b6f468604d3460e",
"reference": "10086e6de6f53489cca5ecc45b6f468604d3460e",
"shasum": ""
},
"require": {
"php": "^7.1 || ^8.0",
"php-http/discovery": "^1.15",
"psr/http-factory-implementation": "^1.0"
},
"require-dev": {
"nyholm/psr7": "^1.0",
"php-http/message": "^1.5",
"php-http/message-factory": "^1.0.2",
"phpunit/phpunit": "^7.5.15 || ^8.5 || ^9.3"
},
"type": "library",
"autoload": {
"psr-4": {
"Http\\Message\\MultipartStream\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Tobias Nyholm",
"email": "tobias.nyholm@gmail.com"
}
],
"description": "A builder class that help you create a multipart stream",
"homepage": "http://php-http.org",
"keywords": [
"factory",
"http",
"message",
"multipart stream",
"stream"
],
"support": {
"issues": "https://github.com/php-http/multipart-stream-builder/issues",
"source": "https://github.com/php-http/multipart-stream-builder/tree/1.4.2"
},
"time": "2024-09-04T13:22:54+00:00"
},
{
"name": "psr/http-client",
"version": "1.0.1",

View File

@@ -19,36 +19,6 @@ class GenericmessageCommand extends SystemCommand
protected $version = '1.0.0';
private Translator $translator;
private const ZH_TO_EN = [
'兽设' => 'fursona',
'兽人' => 'furry character',
'兽控' => 'furry',
'毛毛' => 'fursuit',
'兽装' => 'fursuit',
'大佬' => 'da lao',
'丸吞' => 'vore',
'哦不' => 'oh no',
'哦哇塞' => 'o wa sai',
'哇塞' => 'wa sai',
'色色' => 'hentai',
];
private const EN_TO_ZH = [
'fursona' => '兽设',
'furry character' => '兽人',
'furry' => '兽控',
'fursuit' => '兽装',
'hentai' => '色色',
'sese' => '色色',
'sheshe' => '射射',
'wun tun' => '丸吞',
'vore' => '丸吞',
'money power' => '钞能力',
'oh no' => '哦不',
'touch fish' => '摸鱼',
'o wa sai' => '哦哇塞',
'wa sai' => '哇塞'
];
public function execute(): ServerResponse
{
global $deepl_api_key, $allowed_chat_ids;
@@ -70,21 +40,25 @@ class GenericmessageCommand extends SystemCommand
if ($text_english_count >= ($text_chara_count*2 /3)) {
$sourceLang = 'en';
$targetLang = 'zh';
$glossary = self::EN_TO_ZH;
} else {
$sourceLang = 'zh';
$targetLang = 'en-US';
$glossary = self::ZH_TO_EN;
$text = (new Chinese())->to(Chinese::ZH_HANS, $text);
}
foreach ($glossary as $search => $replace) {
$text = str_ireplace($search, $replace, $text);
}
$deeplOptions = [
'formality' => 'prefer_less',
'model_type' => 'prefer_quality_optimized',
];
try {
$glosasaryConfig = json_decode(file_get_contents(__DIR__ . '/../../glossary.json'), true);
if (isset($glosasaryConfig[$sourceLang][$targetLang])) {
$deeplOptions['glossary'] = $glosasaryConfig[$sourceLang][$targetLang];
}
} catch(\Exception $e) {}
$this->translator = new Translator($deepl_api_key);
try {
$translated = $this->translator->translateText($text, $sourceLang, $targetLang);
$translated = $this->translator->translateText($text, $sourceLang, $targetLang, $deeplOptions);
} catch (\Exception $e) {
return $this->replyToChat(get_class($e) . ': ' . $e->getMessage()); // ?? var_export($e, true)
}

59
upload-glossary.php Normal file
View File

@@ -0,0 +1,59 @@
<?php
require __DIR__ . '/vendor/autoload.php';
require __DIR__ . '/env.php';
$deeplClient = new \DeepL\DeepLClient($deepl_api_key);
$zhToEnGlossary = $deeplClient->createGlossary('Furry ZH to EN', 'zh', 'en', \DeepL\GlossaryEntries::fromEntries([
'兽设' => 'fursona',
'兽人' => 'furry character',
'兽控' => 'furry',
'毛毛' => 'fursuit',
'兽装' => 'fursuit',
'兽聚' => 'furcon',
'大佬' => 'da lao',
'丸吞' => 'vore',
'哦不' => 'oh no',
'哦哇塞' => 'o wa sai',
'哇塞' => 'wa sai',
'色色' => 'sese',
'変态' => 'hentai',
]));
echo "Created '$zhToEnGlossary->name' ($zhToEnGlossary->glossaryId) " .
"$zhToEnGlossary->sourceLang to $zhToEnGlossary->targetLang " .
"containing $zhToEnGlossary->entryCount entries\n";
$enToZhGlossary = $deeplClient->createGlossary('Furry EN to ZH', 'en', 'zh', \DeepL\GlossaryEntries::fromEntries([
'fursona' => '兽设',
'furry character' => '兽人',
'furry' => '兽控',
'fursuit' => '兽装',
'furcon' => '兽聚',
'hentai' => '変态',
'sese' => '色色',
'sheshe' => '射射',
'wun tun' => '丸吞',
'vore' => '丸吞',
'money power' => '钞能力',
'oh no' => '哦不',
'touch fish' => '摸鱼',
'o wa sai' => '哦哇塞',
'wa sai' => '哇塞',
'commission' => '委托',
'dalao' => '大佬',
]));
echo "Created '$enToZhGlossary->name' ($enToZhGlossary->glossaryId) " .
"$enToZhGlossary->sourceLang to $enToZhGlossary->targetLang " .
"containing $enToZhGlossary->entryCount entries\n";
file_put_contents(__DIR__ . '/glossary.json', json_encode([
'zh' => ['en' => $zhToEnGlossary->glossaryId],
'en' => ['zh' => $enToZhGlossary->glossaryId],
]));
echo("\nCleaning up other unused glossaries...\n");
foreach ($deeplClient->listGlossaries() as $glossary) {
if (!(in_array($glossary->glossaryId, [$enToZhGlossary->glossaryId, $zhToEnGlossary->glossaryId]))) {
echo("Unused glossary found, deleting '$glossary->name' ($glossary->glossaryId)\n");
$deeplClient->deleteGlossary($glossary);
}
}