misc python code
This commit is contained in:
309
code/misc/python/scripts/translate.ipynb
Normal file
309
code/misc/python/scripts/translate.ipynb
Normal file
@@ -0,0 +1,309 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'روباه قهوه ای سریع از روی سگ تنبل پرش می کند.'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import translators as ts\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"phrase = 'The quick brown fox jumps over the lazy dog.'\n",
|
||||
"ts.server.google(phrase, from_language='en', to_language='fa')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pathlib import Path\n",
|
||||
"fname=Path('/media/dl92/mydata/projects/code/Data')/'faq.pdf'\n",
|
||||
"#fname=Path('/media/dl92/mydata/projects/code/Data')/'Chapter2.pdf'\n",
|
||||
"\n",
|
||||
"import PyPDF2 \n",
|
||||
"\n",
|
||||
"pdfFileObj = open(fname, 'rb')\n",
|
||||
"reader=PyPDF2.PdfReader(pdfFileObj)\n",
|
||||
" \n",
|
||||
"phrase=reader.pages[0].extract_text()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 59,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Frequently asked questions \n",
|
||||
" \n",
|
||||
" Public questions \n",
|
||||
" 1. How can I make my problems without being present? \n",
|
||||
" • In the guide section of the link system called the Relationship with the Agent, the applicant after logging in \n",
|
||||
" The system can receive its problem with the dealership and the duties and through the link \n",
|
||||
" The follow -up by entering the tracking code, see the response from the dealership. \n",
|
||||
" \n",
|
||||
" 2. I have forgotten your fixed password. \n",
|
||||
" • If you have forgotten your fixed passwords you can through this link “your password \n",
|
||||
" I forgot ! “Receive your temporary password by email. \n",
|
||||
" \n",
|
||||
" 3. I don't get a disposable password: \n",
|
||||
" • Preferably use emails other than Yahoo and Gmail.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(ts.server.google(phrase, from_language='fa', to_language='en'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/tmp/ipykernel_31923/3918874040.py:10: DeprecationWarning: The 'text' argument to find()-type methods is deprecated. Use 'string' instead.\n",
|
||||
" texts = str(soup.findAll(text=True)).replace('\\\\n','')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"from bs4 import BeautifulSoup\n",
|
||||
"\n",
|
||||
"URL='https://mikhak.mfa.gov.ir/form/landing.xhtml'\n",
|
||||
"page = requests.get(URL)\n",
|
||||
"page.content\n",
|
||||
"\n",
|
||||
"soup = BeautifulSoup(page.content, \"xml\")\n",
|
||||
"soup\n",
|
||||
"texts = str(soup.findAll(text=True)).replace('\\\\n','')\n",
|
||||
"#results=soup.find(id=\"introduction\")\n",
|
||||
"#print(results.prettify())\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"\"['html', 'Error 403', ' |', 'Forbidden', 'Request blocked by websites firewall due to security reasons!', 'The requested content or URL is incorrect or contains invalid characters.', 'Please review your request and page URL or contact ', 'mikhak.mfa.gov.ir', ' support.', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'Time: 2023-04-21 22:32:42 UTC', '|', 'Error Code: 403', '|', 'Server Code: 2582', '|', 'Domain: mikhak.mfa.gov.ir', '|', 'Your IP: 82.36.108.222', 'خطای ۴۰۳', ' |', 'Forbidden', 'دیواره\\\\u200cی آتش وب\\\\u200cسایت، درخواست شما را به دلایل امنیتی مسدود کرده است.', 'درخواست HTTP خود را بازبینی کنید:', 'محتوای درخواست یا نشانی اینترنتی وارد شده نادرست است، یا کاراکترهای غیرمجاز در آن به کار رفته است.', 'برای اطلاع بیش\\\\u200cتر با پشتیبانی ', 'mikhak.mfa.gov.ir', ' تماس بگیرید.', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'Time: 2023-04-21 22:32:42 UTC', '|', 'Error Code: 403', '|', 'Server Code: 2582', '|', 'Domain: mikhak.mfa.gov.ir', '|', 'Your IP: 82.36.108.222']\""
|
||||
]
|
||||
},
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"texts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "Language not supported: fa",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[1], line 18\u001b[0m\n\u001b[1;32m 12\u001b[0m language \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfa\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# Language in which you want to convert\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m#language = 'en'\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \n\u001b[1;32m 17\u001b[0m \u001b[38;5;66;03m# Creating an instance of gTTS\u001b[39;00m\n\u001b[0;32m---> 18\u001b[0m speech \u001b[38;5;241m=\u001b[39m \u001b[43mgTTS\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlang\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlanguage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mslow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;66;03m# Saving the converted audio in an mp3 file\u001b[39;00m\n\u001b[1;32m 21\u001b[0m speech\u001b[38;5;241m.\u001b[39msave(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput.mp3\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||
"File \u001b[0;32m~/Data/projects/code/Python/Envs/General/lib/python3.11/site-packages/gtts/tts.py:150\u001b[0m, in \u001b[0;36mgTTS.__init__\u001b[0;34m(self, text, tld, lang, slow, lang_check, pre_processor_funcs, tokenizer_func, timeout)\u001b[0m\n\u001b[1;32m 148\u001b[0m langs \u001b[38;5;241m=\u001b[39m tts_langs()\n\u001b[1;32m 149\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlang \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m langs:\n\u001b[0;32m--> 150\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLanguage not supported: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m lang)\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 152\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;28mstr\u001b[39m(e), exc_info\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
|
||||
"\u001b[0;31mValueError\u001b[0m: Language not supported: fa"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from gtts import gTTS\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# Text to be converted to speech\n",
|
||||
"text = \"hey big boy? you want some fucky fucky or sucky sucky?\"\n",
|
||||
"#text = \"thats quite alright bitch!\"\n",
|
||||
"\n",
|
||||
"# Farsi text to be converted to speech\n",
|
||||
"#text = \"سلام، حال شما چطور است؟\"\n",
|
||||
"\n",
|
||||
"# Language code for Farsi\n",
|
||||
"language = 'fa'\n",
|
||||
"\n",
|
||||
"# Language in which you want to convert\n",
|
||||
"#language = 'en'\n",
|
||||
"\n",
|
||||
"# Creating an instance of gTTS\n",
|
||||
"speech = gTTS(text=text, lang=language, slow=False)\n",
|
||||
"\n",
|
||||
"# Saving the converted audio in an mp3 file\n",
|
||||
"speech.save(\"output.mp3\")\n",
|
||||
"\n",
|
||||
"# Playing the converted file (optional)\n",
|
||||
"os.system(\"mpg output.mp3\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pyttsx3\n",
|
||||
"\n",
|
||||
"# Initialize the TTS engine\n",
|
||||
"engine = pyttsx3.init()\n",
|
||||
"\n",
|
||||
"# Farsi text to be converted to speech\n",
|
||||
"text = \"سلام، حال شما چطور است؟\"\n",
|
||||
"\n",
|
||||
"# Set properties (optional)\n",
|
||||
"engine.setProperty('rate', 150) # Speed of speech\n",
|
||||
"engine.setProperty('volume', 1) # Volume (0.0 to 1.0)\n",
|
||||
"\n",
|
||||
"# Convert text to speech\n",
|
||||
"engine.say(text)\n",
|
||||
"\n",
|
||||
"# Save the speech to a file\n",
|
||||
"engine.save_to_file(text, 'output_farsi.mp3')\n",
|
||||
"\n",
|
||||
"# Play the speech\n",
|
||||
"#engine.runAndWait()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import gtts \n",
|
||||
"import pandas as pd\n",
|
||||
"pd.Series(gtts.lang.tts_langs()).to_clipboard()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\t0\n",
|
||||
"af\tAfrikaans\n",
|
||||
"am\tAmharic\n",
|
||||
"ar\tArabic\n",
|
||||
"bg\tBulgarian\n",
|
||||
"bn\tBengali\n",
|
||||
"bs\tBosnian\n",
|
||||
"ca\tCatalan\n",
|
||||
"cs\tCzech\n",
|
||||
"cy\tWelsh\n",
|
||||
"da\tDanish\n",
|
||||
"de\tGerman\n",
|
||||
"el\tGreek\n",
|
||||
"en\tEnglish\n",
|
||||
"es\tSpanish\n",
|
||||
"et\tEstonian\n",
|
||||
"eu\tBasque\n",
|
||||
"fi\tFinnish\n",
|
||||
"fr\tFrench\n",
|
||||
"gl\tGalician\n",
|
||||
"gu\tGujarati\n",
|
||||
"ha\tHausa\n",
|
||||
"hi\tHindi\n",
|
||||
"hr\tCroatian\n",
|
||||
"hu\tHungarian\n",
|
||||
"id\tIndonesian\n",
|
||||
"is\tIcelandic\n",
|
||||
"it\tItalian\n",
|
||||
"iw\tHebrew\n",
|
||||
"ja\tJapanese\n",
|
||||
"jw\tJavanese\n",
|
||||
"km\tKhmer\n",
|
||||
"kn\tKannada\n",
|
||||
"ko\tKorean\n",
|
||||
"la\tLatin\n",
|
||||
"lt\tLithuanian\n",
|
||||
"lv\tLatvian\n",
|
||||
"ml\tMalayalam\n",
|
||||
"mr\tMarathi\n",
|
||||
"ms\tMalay\n",
|
||||
"my\tMyanmar (Burmese)\n",
|
||||
"ne\tNepali\n",
|
||||
"nl\tDutch\n",
|
||||
"no\tNorwegian\n",
|
||||
"pa\tPunjabi (Gurmukhi)\n",
|
||||
"pl\tPolish\n",
|
||||
"pt\tPortuguese (Brazil)\n",
|
||||
"pt-PT\tPortuguese (Portugal)\n",
|
||||
"ro\tRomanian\n",
|
||||
"ru\tRussian\n",
|
||||
"si\tSinhala\n",
|
||||
"sk\tSlovak\n",
|
||||
"sq\tAlbanian\n",
|
||||
"sr\tSerbian\n",
|
||||
"su\tSundanese\n",
|
||||
"sv\tSwedish\n",
|
||||
"sw\tSwahili\n",
|
||||
"ta\tTamil\n",
|
||||
"te\tTelugu\n",
|
||||
"th\tThai\n",
|
||||
"tl\tFilipino\n",
|
||||
"tr\tTurkish\n",
|
||||
"uk\tUkrainian\n",
|
||||
"ur\tUrdu\n",
|
||||
"vi\tVietnamese\n",
|
||||
"yue\tCantonese\n",
|
||||
"zh-CN\tChinese (Simplified)\n",
|
||||
"zh-TW\tChinese (Mandarin/Taiwan)\n",
|
||||
"zh\tChinese (Mandarin)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "General",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Reference in New Issue
Block a user