Skip to content

Commit

Permalink
Use lru_cache on Wtp.get_page()
Browse files Browse the repository at this point in the history
`get_page()` is kind slow(1s per call), cache the requests improve the
performance significantly. This reduces the process time of Chinese
Wiktionary from 40 minutes to 10 minutes.
  • Loading branch information
xxyzz committed Aug 21, 2023
1 parent 4edfb17 commit 4b3d963
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
7 changes: 6 additions & 1 deletion wikitextprocessor/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import urllib.parse
from collections.abc import Sequence
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from types import TracebackType
from typing import (
Expand Down Expand Up @@ -1683,6 +1684,7 @@ def magic_repl(m: re.Match) -> str:
# print(" _finalize_expand:{!r}".format(text))
return text

@lru_cache
def get_page(
self, title: str, namespace_id: Optional[int] = None
) -> Optional[Page]:
Expand Down Expand Up @@ -1710,7 +1712,10 @@ def get_page(
# Add namespace prefix
title = ns_prefix + title

query_str = "SELECT * FROM pages WHERE title = ?"
query_str = """
SELECT title, namespace_id, redirect_to, need_pre_expand, body, model
FROM pages WHERE title = ?
"""
if namespace_id is not None:
query_str += " AND namespace_id = ?"
query_str += " LIMIT 1"
Expand Down
2 changes: 1 addition & 1 deletion wikitextprocessor/luaexec.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ def get_page_info(ctx: "Wtp", title: str, namespace_id: int) -> "_LuaTable":
assert ctx.lua is not None

page_id = 0 # XXX collect required info in phase 1
page: Optional["Page"] = ctx.get_page(title, namespace_id)
page = ctx.get_page(title, namespace_id)
# whether the page exists and what its id might be
dt = {
"id": page_id,
Expand Down

0 comments on commit 4b3d963

Please sign in to comment.