cx_libtranslate_searcher.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. #!/usr/bin/env python3
  2. import pathlib
  3. import typing
  4. class file:
  5. """ This class parse single file, and returns all phrases to translate.
  6. This open file, and trying to find all phrases to translate in the file.
  7. Process is separated to few steps, first run filter() to get all lines
  8. when translate functions exists. Then run parse(), which would get
  9. phrases and append it to list. On the end, could get results as list by
  10. result() function, which return list with all phrases, or phrases()
  11. generator, which iterate on all of them.
  12. """
  13. def __init__(self, target: pathlib.Path) -> None:
  14. """ It creates new file file parser.
  15. Parameters
  16. ----------
  17. target : pathlib.Path
  18. It is path to the file in the filesystem to parse.
  19. Raises
  20. ------
  21. TypeError
  22. When target is not afile or not exists.
  23. """
  24. if not target.is_file():
  25. raise TypeError("Target \"" + str(target) + "\" not exists.")
  26. self.__to_parse = None
  27. self.__phrases = None
  28. with target.open() as handle:
  29. self.__content = handle.read().split("\n")
  30. def __targets(self) -> list:
  31. """ This return targets function which libtranslate use to translates.
  32. Returns
  33. -------
  34. list
  35. List with targets to search for in the code.
  36. """
  37. return [ "_(", ".translate(", ".tr(", ".phrase ", ".phrase=" ]
  38. def __string(self) -> list:
  39. """ This returns javascript string openers and closers.
  40. Returns
  41. -------
  42. list
  43. List chars with open and close strings in js.
  44. """
  45. return [ '"', "'", "`" ]
  46. def __has_phrase(self, line: str) -> bool:
  47. """ This check that given line of code has translation instructions.
  48. Parameters
  49. ----------
  50. line : str
  51. Line of code to search for instructions in.
  52. Returns
  53. -------
  54. bool
  55. True when line has translations or False when not.
  56. """
  57. for target in self.__targets():
  58. if line.find(target) != -1:
  59. return True
  60. return False
  61. def filter(self) -> object:
  62. """ This filter file contents for lines with translation content.
  63. This search for translate instructions in all file content. When
  64. line contain translation instructions, then it append it to new
  65. list, which would be processed in the next step.
  66. Returns
  67. -------
  68. file
  69. Own instanct for chain loading.
  70. """
  71. self.__to_parse = list()
  72. for line in self.__content:
  73. if self.__has_phrase(line):
  74. self.__to_parse.append(line)
  75. return self
  76. def __get_next(self, line: str, start: int) -> int:
  77. """ This return next speech of the translation instruction.
  78. This function trying to find next speech of any from translation
  79. instructions. When it found, then return position of the end of
  80. that function, but when could not found anything, then return -1.
  81. Parameters
  82. ----------
  83. line : str
  84. Line to found instruction in.
  85. start : int
  86. This is position when start to searching for.
  87. Returns
  88. -------
  89. int
  90. Position of the end of found instruction or -1 when not found.
  91. """
  92. result = -1
  93. for target in self.__targets():
  94. position = line.find(target, start)
  95. if position == -1:
  96. continue
  97. position = position + len(target)
  98. if result == -1:
  99. result = position
  100. continue
  101. if result > position:
  102. result = position
  103. return result
  104. def __get_string_pos(self, line: str, start: int) -> int:
  105. """ This return next position of the string in the line.
  106. This trying to search string in the line, when found it, then return
  107. it position, or return -1 when not found anything. It return position
  108. of the start string char, in opposition to __get_next.
  109. Parameters
  110. ----------
  111. line : str
  112. Line to search for string in.
  113. start : int
  114. Position to start searching in.
  115. Returns
  116. -------
  117. int
  118. Position of the string, or -1.
  119. """
  120. for target in self.__string():
  121. position = line.find(target, start)
  122. if position == -1:
  123. continue
  124. return position
  125. return -1
  126. def __cut_text(self, line: str, position: int) -> str|None:
  127. """ This get string from given line next to given position.
  128. This trying to find string next to given position. When found it,
  129. then get it, and return its content. When not found anything, then
  130. return None.
  131. Parameters
  132. ----------
  133. line : str
  134. Line to search for string in.
  135. position : int
  136. Position to search for string on.
  137. Returns
  138. -------
  139. str
  140. Content of the found string.
  141. None
  142. When not found any string.
  143. """
  144. start = self.__get_string_pos(line, position)
  145. if start == -1:
  146. return None
  147. char = line[start]
  148. start = start + 1
  149. end = line.find(char, start)
  150. if end == -1:
  151. return None
  152. return line[start:end]
  153. def __parse_line(self, line: str) -> typing.Iterator[str]:
  154. """ This parse single line, and return generator with found phrases.
  155. Parameters
  156. ----------
  157. line : str
  158. Line to parse.
  159. Returns
  160. -------
  161. Iterator[str]
  162. All found phrases in the line.
  163. """
  164. current = 0
  165. while True:
  166. current = self.__get_next(line, current)
  167. if current == -1:
  168. break
  169. text = self.__cut_text(line, current)
  170. if text is None:
  171. continue
  172. current = current + len(text)
  173. yield text
  174. def parse(self) -> object:
  175. """ This parse all lines in the file.
  176. Returns
  177. -------
  178. file
  179. Own instance to chain loading.
  180. """
  181. if self.__to_parse is None:
  182. raise RuntimeError("Run filter() first.")
  183. self.__phrases = list()
  184. for line in self.__to_parse:
  185. for count in self.__parse_line(line):
  186. if not count in self.__phrases:
  187. self.__phrases.append(count)
  188. return self
  189. def result(self) -> list:
  190. """ This return all founded phrases as list.
  191. Raises
  192. ------
  193. RuntimeError
  194. When not run parse() before.
  195. Returns
  196. -------
  197. list
  198. List with all found phrases.
  199. """
  200. if self.__phrases is None:
  201. raise RuntimeError("Run parse() first.")
  202. return self.__phrases
  203. def phrases(self) ->typing.Iterator[str]:
  204. """ This returns generator with all phrases.
  205. Raises
  206. ------
  207. RuntimeError
  208. When not run parse() before.
  209. Returns
  210. -------
  211. Iterator[str]
  212. Generator with all phrases.
  213. """
  214. for phrase in self.result():
  215. yield phrase
  216. class directory:
  217. """ This open all Javascript files in the directory and search phrases.
  218. This trying to open all Javascript files and all Javascript in the
  219. subdirectories. Then parsing all that files, and adding phrases from
  220. its to the list. On the end that phrases could be returned as list, or
  221. loaded from Iterator.
  222. """
  223. def __init__(self, target: pathlib.Path) -> None:
  224. """ This create new directory instance.
  225. Raises
  226. ------
  227. TypeError
  228. When target path is not directory.
  229. Parameters
  230. ----------
  231. target : pathlib.Path
  232. Directory to work in.
  233. """
  234. if not target.is_dir():
  235. raise TypeError("Target \"" + str(target) + "\" is not dir.")
  236. self.__target = target
  237. self.__phrases = list()
  238. def __append(self, phrases: list) -> None:
  239. """ This append new phrase to phrases list.
  240. Parameters
  241. ----------
  242. phrases : list
  243. List of phrases to add.
  244. """
  245. for phrase in phrases:
  246. if phrase in self.__phrases:
  247. continue
  248. self.__phrases.append(phrase)
  249. def process(self) -> object:
  250. """ This process given directory.
  251. Returns
  252. -------
  253. directory
  254. Own instance to chain loading.
  255. """
  256. self.__process_directory(self.__target)
  257. return self
  258. def result(self) -> list:
  259. """ This return list with result.
  260. Returns
  261. -------
  262. list
  263. List with phrases from the files.
  264. """
  265. return self.__phrases
  266. def phrases(self) -> typing.Iterator[str]:
  267. """ This return all phrases as iterator.
  268. Returns
  269. -------
  270. Iterator[str]
  271. All phrases from files.
  272. """
  273. for phrase in self.__phrases:
  274. yield phrase
  275. def __js_extensions(self) -> list:
  276. """ This return all extensions for js files.
  277. Returns
  278. -------
  279. list
  280. All js files extensions.
  281. """
  282. return [ "js", "mjs", "ts" ]
  283. def __process_directory(self, directory: pathlib.Path) -> None:
  284. """ This process given directory.
  285. This process given directory, when in directory exists any diretory,
  286. then it would be processed by that function recursive. When found
  287. file, then parse it, and add phrases from it to the list.
  288. Parameters
  289. ----------
  290. directory : pathlib.Path
  291. Directory to work on
  292. """
  293. for count in directory.iterdir():
  294. if count.is_dir():
  295. self.__process_directory(count)
  296. continue
  297. if count.is_file():
  298. self.__process_file(count)
  299. continue
  300. def __process_file(self, target: pathlib.Path) -> None:
  301. """ This process single file.
  302. This process single file. When file is not Javasocript file, then ti
  303. skip it, but when file is Javascript source code, then it trying to
  304. extract all phrases from it and adds it to phrases list.
  305. Parameters
  306. ----------
  307. target : pathlib.Path
  308. Target file to process.
  309. """
  310. suffix = target.suffix[1:]
  311. if not suffix in self.__js_extensions():
  312. return
  313. self.__append(file(target).filter().parse().result())
  314. class dictionary:
  315. """ This create new sample dictionary with phrases.
  316. This class create sample dictionary file from phrases list.
  317. It could return result as string, or save it to file.
  318. """
  319. def __init__(self, phrases: list) -> None:
  320. """ This create new dictionary instance.
  321. Parameters
  322. ----------
  323. phrases : list
  324. List of phrases to prepare dictionary from.
  325. """
  326. self.__phrases = phrases
  327. def __process_single_phrase(self, phrase: str) -> str:
  328. """ This process single phrase to line in the dictionary.
  329. Parameters
  330. ----------
  331. phrase : str
  332. Phrase to make line from.
  333. Returns
  334. -------
  335. str
  336. Phrase as line in dictionary.
  337. """
  338. return (" " * 4) + "\"" + phrase + "\": \"\""
  339. @property
  340. def result(self) -> str:
  341. """ It process all phrases to the dictionary file.
  342. Returns
  343. -------
  344. str
  345. Parsed dictionary.
  346. """
  347. lines = []
  348. start = "{\n"
  349. stop = "\n}\n"
  350. for phrase in self.__phrases:
  351. lines.append(self.__process_single_phrase(phrase))
  352. return start + str(",\n").join(lines) + stop
  353. def write(self, target: pathlib.Path) -> None:
  354. """ It write dictionary to the file.
  355. Parameters
  356. ----------
  357. target : pathlib.Path
  358. Target file to write dictionary into.
  359. """
  360. with target.open("w+") as handle:
  361. handle.write(self.result)
  362. if __name__ == "__main__":
  363. import argparse
  364. parser = argparse.ArgumentParser(
  365. description = "This script helps to create phrasebook from scripts."
  366. )
  367. parser.add_argument(
  368. "source",
  369. type = pathlib.Path,
  370. help = "This is source file or directory to parse from."
  371. )
  372. parser.add_argument(
  373. "--output",
  374. type = pathlib.Path,
  375. default = "output.json",
  376. help = "This is output phrasebook file, to save into."
  377. )
  378. arguments = parser.parse_args()
  379. if not arguments.source.exists():
  380. print("Source \"" + str(arguments.source) + "\" not exists.")
  381. exit(127)
  382. if arguments.source.is_file():
  383. target = file(arguments.source).filter().parse().result()
  384. elif arguments.source.is_dir():
  385. target = directory(arguments.source).process().result()
  386. else:
  387. print("Source is not file or directory.")
  388. exit(126)
  389. try:
  390. dictionary(target).write(arguments.output)
  391. print("Processed successfull.")
  392. print("Processed: " + str(len(target)) + " phrases.")
  393. except Exception as error:
  394. print(str(error))
  395. print("Can not save \"" + str(arguments.output) + "\" output file.")
  396. exit(125)