Use DeepL native glossary
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
|||||||
/vendor/
|
/vendor/
|
||||||
composer
|
composer
|
||||||
env.php
|
env.php
|
||||||
|
/glossary.json
|
||||||
@@ -9,3 +9,4 @@ Telegram bot that handle translating between Simplified Chinese and English usin
|
|||||||
1. `composer install`
|
1. `composer install`
|
||||||
1. `php set_web_hook.php`
|
1. `php set_web_hook.php`
|
||||||
1. It should work now
|
1. It should work now
|
||||||
|
1. (optional) `php upload-glossary.php` - Feel free to edit the file for your own glossary
|
||||||
|
|||||||
@@ -6,5 +6,10 @@
|
|||||||
"deeplcom/deepl-php": "^1.1.0",
|
"deeplcom/deepl-php": "^1.1.0",
|
||||||
"longman/telegram-bot": "*",
|
"longman/telegram-bot": "*",
|
||||||
"steelywing/chinese": "^0.5.3"
|
"steelywing/chinese": "^0.5.3"
|
||||||
|
},
|
||||||
|
"config": {
|
||||||
|
"allow-plugins": {
|
||||||
|
"php-http/discovery": true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
157
composer.lock
generated
157
composer.lock
generated
@@ -8,26 +8,34 @@
|
|||||||
"packages": [
|
"packages": [
|
||||||
{
|
{
|
||||||
"name": "deeplcom/deepl-php",
|
"name": "deeplcom/deepl-php",
|
||||||
"version": "v1.1.0",
|
"version": "v1.11.1",
|
||||||
"source": {
|
"source": {
|
||||||
"type": "git",
|
"type": "git",
|
||||||
"url": "https://github.com/DeepLcom/deepl-php.git",
|
"url": "https://github.com/DeepLcom/deepl-php.git",
|
||||||
"reference": "bb6ab74e3d90a543d7effd5ee2578d450a744c12"
|
"reference": "004cf73864e8547ece7e49a2b08ed7a2d6672d9c"
|
||||||
},
|
},
|
||||||
"dist": {
|
"dist": {
|
||||||
"type": "zip",
|
"type": "zip",
|
||||||
"url": "https://api.github.com/repos/DeepLcom/deepl-php/zipball/bb6ab74e3d90a543d7effd5ee2578d450a744c12",
|
"url": "https://api.github.com/repos/DeepLcom/deepl-php/zipball/004cf73864e8547ece7e49a2b08ed7a2d6672d9c",
|
||||||
"reference": "bb6ab74e3d90a543d7effd5ee2578d450a744c12",
|
"reference": "004cf73864e8547ece7e49a2b08ed7a2d6672d9c",
|
||||||
"shasum": ""
|
"shasum": ""
|
||||||
},
|
},
|
||||||
"require": {
|
"require": {
|
||||||
"ext-curl": "*",
|
"ext-curl": "*",
|
||||||
"ext-json": "*",
|
"ext-json": "*",
|
||||||
"ext-mbstring": "*",
|
"ext-mbstring": "*",
|
||||||
"php": ">=7.3.0"
|
"php": ">=7.3.0",
|
||||||
|
"php-http/discovery": "^1.18",
|
||||||
|
"php-http/multipart-stream-builder": "^1.3",
|
||||||
|
"psr/http-client": "^1.0",
|
||||||
|
"psr/http-client-implementation": "*",
|
||||||
|
"psr/http-factory-implementation": "*",
|
||||||
|
"psr/log": "^1.1 || ^2.0 || ^3.0"
|
||||||
},
|
},
|
||||||
"require-dev": {
|
"require-dev": {
|
||||||
"friendsofphp/php-cs-fixer": "^3",
|
"friendsofphp/php-cs-fixer": "^3",
|
||||||
|
"guzzlehttp/guzzle": "^7.7.0",
|
||||||
|
"php-mock/php-mock-phpunit": "^2.6",
|
||||||
"phpunit/phpunit": "^9",
|
"phpunit/phpunit": "^9",
|
||||||
"ramsey/uuid": "^4.2",
|
"ramsey/uuid": "^4.2",
|
||||||
"squizlabs/php_codesniffer": "^3.3"
|
"squizlabs/php_codesniffer": "^3.3"
|
||||||
@@ -57,9 +65,9 @@
|
|||||||
],
|
],
|
||||||
"support": {
|
"support": {
|
||||||
"issues": "https://github.com/DeepLcom/deepl-php/issues",
|
"issues": "https://github.com/DeepLcom/deepl-php/issues",
|
||||||
"source": "https://github.com/DeepLcom/deepl-php/tree/v1.1.0"
|
"source": "https://github.com/DeepLcom/deepl-php/tree/v1.11.1"
|
||||||
},
|
},
|
||||||
"time": "2022-09-28T08:44:58+00:00"
|
"time": "2025-01-17T09:36:14+00:00"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "guzzlehttp/guzzle",
|
"name": "guzzlehttp/guzzle",
|
||||||
@@ -493,6 +501,141 @@
|
|||||||
],
|
],
|
||||||
"time": "2022-09-04T13:22:41+00:00"
|
"time": "2022-09-04T13:22:41+00:00"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "php-http/discovery",
|
||||||
|
"version": "1.20.0",
|
||||||
|
"source": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "https://github.com/php-http/discovery.git",
|
||||||
|
"reference": "82fe4c73ef3363caed49ff8dd1539ba06044910d"
|
||||||
|
},
|
||||||
|
"dist": {
|
||||||
|
"type": "zip",
|
||||||
|
"url": "https://api.github.com/repos/php-http/discovery/zipball/82fe4c73ef3363caed49ff8dd1539ba06044910d",
|
||||||
|
"reference": "82fe4c73ef3363caed49ff8dd1539ba06044910d",
|
||||||
|
"shasum": ""
|
||||||
|
},
|
||||||
|
"require": {
|
||||||
|
"composer-plugin-api": "^1.0|^2.0",
|
||||||
|
"php": "^7.1 || ^8.0"
|
||||||
|
},
|
||||||
|
"conflict": {
|
||||||
|
"nyholm/psr7": "<1.0",
|
||||||
|
"zendframework/zend-diactoros": "*"
|
||||||
|
},
|
||||||
|
"provide": {
|
||||||
|
"php-http/async-client-implementation": "*",
|
||||||
|
"php-http/client-implementation": "*",
|
||||||
|
"psr/http-client-implementation": "*",
|
||||||
|
"psr/http-factory-implementation": "*",
|
||||||
|
"psr/http-message-implementation": "*"
|
||||||
|
},
|
||||||
|
"require-dev": {
|
||||||
|
"composer/composer": "^1.0.2|^2.0",
|
||||||
|
"graham-campbell/phpspec-skip-example-extension": "^5.0",
|
||||||
|
"php-http/httplug": "^1.0 || ^2.0",
|
||||||
|
"php-http/message-factory": "^1.0",
|
||||||
|
"phpspec/phpspec": "^5.1 || ^6.1 || ^7.3",
|
||||||
|
"sebastian/comparator": "^3.0.5 || ^4.0.8",
|
||||||
|
"symfony/phpunit-bridge": "^6.4.4 || ^7.0.1"
|
||||||
|
},
|
||||||
|
"type": "composer-plugin",
|
||||||
|
"extra": {
|
||||||
|
"class": "Http\\Discovery\\Composer\\Plugin",
|
||||||
|
"plugin-optional": true
|
||||||
|
},
|
||||||
|
"autoload": {
|
||||||
|
"psr-4": {
|
||||||
|
"Http\\Discovery\\": "src/"
|
||||||
|
},
|
||||||
|
"exclude-from-classmap": [
|
||||||
|
"src/Composer/Plugin.php"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"notification-url": "https://packagist.org/downloads/",
|
||||||
|
"license": [
|
||||||
|
"MIT"
|
||||||
|
],
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "Márk Sági-Kazár",
|
||||||
|
"email": "mark.sagikazar@gmail.com"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "Finds and installs PSR-7, PSR-17, PSR-18 and HTTPlug implementations",
|
||||||
|
"homepage": "http://php-http.org",
|
||||||
|
"keywords": [
|
||||||
|
"adapter",
|
||||||
|
"client",
|
||||||
|
"discovery",
|
||||||
|
"factory",
|
||||||
|
"http",
|
||||||
|
"message",
|
||||||
|
"psr17",
|
||||||
|
"psr7"
|
||||||
|
],
|
||||||
|
"support": {
|
||||||
|
"issues": "https://github.com/php-http/discovery/issues",
|
||||||
|
"source": "https://github.com/php-http/discovery/tree/1.20.0"
|
||||||
|
},
|
||||||
|
"time": "2024-10-02T11:20:13+00:00"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "php-http/multipart-stream-builder",
|
||||||
|
"version": "1.4.2",
|
||||||
|
"source": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "https://github.com/php-http/multipart-stream-builder.git",
|
||||||
|
"reference": "10086e6de6f53489cca5ecc45b6f468604d3460e"
|
||||||
|
},
|
||||||
|
"dist": {
|
||||||
|
"type": "zip",
|
||||||
|
"url": "https://api.github.com/repos/php-http/multipart-stream-builder/zipball/10086e6de6f53489cca5ecc45b6f468604d3460e",
|
||||||
|
"reference": "10086e6de6f53489cca5ecc45b6f468604d3460e",
|
||||||
|
"shasum": ""
|
||||||
|
},
|
||||||
|
"require": {
|
||||||
|
"php": "^7.1 || ^8.0",
|
||||||
|
"php-http/discovery": "^1.15",
|
||||||
|
"psr/http-factory-implementation": "^1.0"
|
||||||
|
},
|
||||||
|
"require-dev": {
|
||||||
|
"nyholm/psr7": "^1.0",
|
||||||
|
"php-http/message": "^1.5",
|
||||||
|
"php-http/message-factory": "^1.0.2",
|
||||||
|
"phpunit/phpunit": "^7.5.15 || ^8.5 || ^9.3"
|
||||||
|
},
|
||||||
|
"type": "library",
|
||||||
|
"autoload": {
|
||||||
|
"psr-4": {
|
||||||
|
"Http\\Message\\MultipartStream\\": "src/"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"notification-url": "https://packagist.org/downloads/",
|
||||||
|
"license": [
|
||||||
|
"MIT"
|
||||||
|
],
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "Tobias Nyholm",
|
||||||
|
"email": "tobias.nyholm@gmail.com"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"description": "A builder class that help you create a multipart stream",
|
||||||
|
"homepage": "http://php-http.org",
|
||||||
|
"keywords": [
|
||||||
|
"factory",
|
||||||
|
"http",
|
||||||
|
"message",
|
||||||
|
"multipart stream",
|
||||||
|
"stream"
|
||||||
|
],
|
||||||
|
"support": {
|
||||||
|
"issues": "https://github.com/php-http/multipart-stream-builder/issues",
|
||||||
|
"source": "https://github.com/php-http/multipart-stream-builder/tree/1.4.2"
|
||||||
|
},
|
||||||
|
"time": "2024-09-04T13:22:54+00:00"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "psr/http-client",
|
"name": "psr/http-client",
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
|
|||||||
@@ -19,36 +19,6 @@ class GenericmessageCommand extends SystemCommand
|
|||||||
protected $version = '1.0.0';
|
protected $version = '1.0.0';
|
||||||
private Translator $translator;
|
private Translator $translator;
|
||||||
|
|
||||||
private const ZH_TO_EN = [
|
|
||||||
'兽设' => 'fursona',
|
|
||||||
'兽人' => 'furry character',
|
|
||||||
'兽控' => 'furry',
|
|
||||||
'毛毛' => 'fursuit',
|
|
||||||
'兽装' => 'fursuit',
|
|
||||||
'大佬' => 'da lao',
|
|
||||||
'丸吞' => 'vore',
|
|
||||||
'哦不' => 'oh no',
|
|
||||||
'哦哇塞' => 'o wa sai',
|
|
||||||
'哇塞' => 'wa sai',
|
|
||||||
'色色' => 'hentai',
|
|
||||||
];
|
|
||||||
private const EN_TO_ZH = [
|
|
||||||
'fursona' => '兽设',
|
|
||||||
'furry character' => '兽人',
|
|
||||||
'furry' => '兽控',
|
|
||||||
'fursuit' => '兽装',
|
|
||||||
'hentai' => '色色',
|
|
||||||
'sese' => '色色',
|
|
||||||
'sheshe' => '射射',
|
|
||||||
'wun tun' => '丸吞',
|
|
||||||
'vore' => '丸吞',
|
|
||||||
'money power' => '钞能力',
|
|
||||||
'oh no' => '哦不',
|
|
||||||
'touch fish' => '摸鱼',
|
|
||||||
'o wa sai' => '哦哇塞',
|
|
||||||
'wa sai' => '哇塞'
|
|
||||||
];
|
|
||||||
|
|
||||||
public function execute(): ServerResponse
|
public function execute(): ServerResponse
|
||||||
{
|
{
|
||||||
global $deepl_api_key, $allowed_chat_ids;
|
global $deepl_api_key, $allowed_chat_ids;
|
||||||
@@ -70,21 +40,25 @@ class GenericmessageCommand extends SystemCommand
|
|||||||
if ($text_english_count >= ($text_chara_count*2 /3)) {
|
if ($text_english_count >= ($text_chara_count*2 /3)) {
|
||||||
$sourceLang = 'en';
|
$sourceLang = 'en';
|
||||||
$targetLang = 'zh';
|
$targetLang = 'zh';
|
||||||
$glossary = self::EN_TO_ZH;
|
|
||||||
} else {
|
} else {
|
||||||
$sourceLang = 'zh';
|
$sourceLang = 'zh';
|
||||||
$targetLang = 'en-US';
|
$targetLang = 'en-US';
|
||||||
$glossary = self::ZH_TO_EN;
|
|
||||||
$text = (new Chinese())->to(Chinese::ZH_HANS, $text);
|
$text = (new Chinese())->to(Chinese::ZH_HANS, $text);
|
||||||
}
|
}
|
||||||
|
$deeplOptions = [
|
||||||
foreach ($glossary as $search => $replace) {
|
'formality' => 'prefer_less',
|
||||||
$text = str_ireplace($search, $replace, $text);
|
'model_type' => 'prefer_quality_optimized',
|
||||||
|
];
|
||||||
|
try {
|
||||||
|
$glosasaryConfig = json_decode(file_get_contents(__DIR__ . '/../../glossary.json'), true);
|
||||||
|
if (isset($glosasaryConfig[$sourceLang][$targetLang])) {
|
||||||
|
$deeplOptions['glossary'] = $glosasaryConfig[$sourceLang][$targetLang];
|
||||||
}
|
}
|
||||||
|
} catch(\Exception $e) {}
|
||||||
|
|
||||||
$this->translator = new Translator($deepl_api_key);
|
$this->translator = new Translator($deepl_api_key);
|
||||||
try {
|
try {
|
||||||
$translated = $this->translator->translateText($text, $sourceLang, $targetLang);
|
$translated = $this->translator->translateText($text, $sourceLang, $targetLang, $deeplOptions);
|
||||||
} catch (\Exception $e) {
|
} catch (\Exception $e) {
|
||||||
return $this->replyToChat(get_class($e) . ': ' . $e->getMessage()); // ?? var_export($e, true)
|
return $this->replyToChat(get_class($e) . ': ' . $e->getMessage()); // ?? var_export($e, true)
|
||||||
}
|
}
|
||||||
|
|||||||
59
upload-glossary.php
Normal file
59
upload-glossary.php
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
<?php
|
||||||
|
require __DIR__ . '/vendor/autoload.php';
|
||||||
|
require __DIR__ . '/env.php';
|
||||||
|
|
||||||
|
$deeplClient = new \DeepL\DeepLClient($deepl_api_key);
|
||||||
|
$zhToEnGlossary = $deeplClient->createGlossary('Furry ZH to EN', 'zh', 'en', \DeepL\GlossaryEntries::fromEntries([
|
||||||
|
'兽设' => 'fursona',
|
||||||
|
'兽人' => 'furry character',
|
||||||
|
'兽控' => 'furry',
|
||||||
|
'毛毛' => 'fursuit',
|
||||||
|
'兽装' => 'fursuit',
|
||||||
|
'兽聚' => 'furcon',
|
||||||
|
'大佬' => 'da lao',
|
||||||
|
'丸吞' => 'vore',
|
||||||
|
'哦不' => 'oh no',
|
||||||
|
'哦哇塞' => 'o wa sai',
|
||||||
|
'哇塞' => 'wa sai',
|
||||||
|
'色色' => 'sese',
|
||||||
|
'変态' => 'hentai',
|
||||||
|
]));
|
||||||
|
echo "Created '$zhToEnGlossary->name' ($zhToEnGlossary->glossaryId) " .
|
||||||
|
"$zhToEnGlossary->sourceLang to $zhToEnGlossary->targetLang " .
|
||||||
|
"containing $zhToEnGlossary->entryCount entries\n";
|
||||||
|
|
||||||
|
$enToZhGlossary = $deeplClient->createGlossary('Furry EN to ZH', 'en', 'zh', \DeepL\GlossaryEntries::fromEntries([
|
||||||
|
'fursona' => '兽设',
|
||||||
|
'furry character' => '兽人',
|
||||||
|
'furry' => '兽控',
|
||||||
|
'fursuit' => '兽装',
|
||||||
|
'furcon' => '兽聚',
|
||||||
|
'hentai' => '変态',
|
||||||
|
'sese' => '色色',
|
||||||
|
'sheshe' => '射射',
|
||||||
|
'wun tun' => '丸吞',
|
||||||
|
'vore' => '丸吞',
|
||||||
|
'money power' => '钞能力',
|
||||||
|
'oh no' => '哦不',
|
||||||
|
'touch fish' => '摸鱼',
|
||||||
|
'o wa sai' => '哦哇塞',
|
||||||
|
'wa sai' => '哇塞',
|
||||||
|
'commission' => '委托',
|
||||||
|
'dalao' => '大佬',
|
||||||
|
]));
|
||||||
|
echo "Created '$enToZhGlossary->name' ($enToZhGlossary->glossaryId) " .
|
||||||
|
"$enToZhGlossary->sourceLang to $enToZhGlossary->targetLang " .
|
||||||
|
"containing $enToZhGlossary->entryCount entries\n";
|
||||||
|
|
||||||
|
file_put_contents(__DIR__ . '/glossary.json', json_encode([
|
||||||
|
'zh' => ['en' => $zhToEnGlossary->glossaryId],
|
||||||
|
'en' => ['zh' => $enToZhGlossary->glossaryId],
|
||||||
|
]));
|
||||||
|
|
||||||
|
echo("\nCleaning up other unused glossaries...\n");
|
||||||
|
foreach ($deeplClient->listGlossaries() as $glossary) {
|
||||||
|
if (!(in_array($glossary->glossaryId, [$enToZhGlossary->glossaryId, $zhToEnGlossary->glossaryId]))) {
|
||||||
|
echo("Unused glossary found, deleting '$glossary->name' ($glossary->glossaryId)\n");
|
||||||
|
$deeplClient->deleteGlossary($glossary);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user