Coverage for src/mkdocstrings/_cache.py: 25.49%

43 statements  

« prev     ^ index     » next       coverage.py v7.6.2, created at 2024-10-12 18:59 +0200

1import datetime 

2import gzip 

3import hashlib 

4import os 

5import urllib.parse 

6import urllib.request 

7from typing import BinaryIO, Callable 

8 

9import click 

10import platformdirs 

11 

12from mkdocstrings.loggers import get_logger 

13 

14log = get_logger(__name__) 

15 

16 

17def download_url_with_gz(url: str) -> bytes: 

18 req = urllib.request.Request( # noqa: S310 

19 url, 

20 headers={"Accept-Encoding": "gzip", "User-Agent": "mkdocstrings/0.15.0"}, 

21 ) 

22 with urllib.request.urlopen(req) as resp: # noqa: S310 

23 content: BinaryIO = resp 

24 if "gzip" in resp.headers.get("content-encoding", ""): 

25 content = gzip.GzipFile(fileobj=resp) # type: ignore[assignment] 

26 return content.read() 

27 

28 

29# This is mostly a copy of https://github.com/mkdocs/mkdocs/blob/master/mkdocs/utils/cache.py 

30# In the future maybe they can be deduplicated. 

31 

32 

33def download_and_cache_url( 

34 url: str, 

35 download: Callable[[str], bytes], 

36 cache_duration: datetime.timedelta, 

37 comment: bytes = b"# ", 

38) -> bytes: 

39 """Downloads a file from the URL, stores it under ~/.cache/, and returns its content. 

40 

41 For tracking the age of the content, a prefix is inserted into the stored file, rather than relying on mtime. 

42 

43 Args: 

44 url: URL to use. 

45 download: Callback that will accept the URL and actually perform the download. 

46 cache_duration: How long to consider the URL content cached. 

47 comment: The appropriate comment prefix for this file format. 

48 """ 

49 directory = os.path.join(platformdirs.user_cache_dir("mkdocs"), "mkdocstrings_url_cache") 

50 name_hash = hashlib.sha256(url.encode()).hexdigest()[:32] 

51 path = os.path.join(directory, name_hash + os.path.splitext(url)[1]) 

52 

53 now = int(datetime.datetime.now(datetime.timezone.utc).timestamp()) 

54 prefix = b"%s%s downloaded at timestamp " % (comment, url.encode()) 

55 # Check for cached file and try to return it 

56 if os.path.isfile(path): 

57 try: 

58 with open(path, "rb") as f: 

59 line = f.readline() 

60 if line.startswith(prefix): 

61 line = line[len(prefix) :] 

62 timestamp = int(line) 

63 if datetime.timedelta(seconds=(now - timestamp)) <= cache_duration: 

64 log.debug(f"Using cached '{path}' for '{url}'") 

65 return f.read() 

66 except (OSError, ValueError) as e: 

67 log.debug(f"{type(e).__name__}: {e}") 

68 

69 # Download and cache the file 

70 log.debug(f"Downloading '{url}' to '{path}'") 

71 content = download(url) 

72 os.makedirs(directory, exist_ok=True) 

73 with click.open_file(path, "wb", atomic=True) as f: 

74 f.write(b"%s%d\n" % (prefix, now)) 

75 f.write(content) 

76 return content