Coverage for pyTooling/GenericPath/URL.py: 84%

163 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-25 22:22 +0000

1# ==================================================================================================================== # 

2# _____ _ _ ____ _ ____ _ _ # 

3# _ __ _ |_ _|__ ___ | (_)_ __ __ _ / ___| ___ _ __ ___ _ __(_) ___| _ \ __ _| |_| |__ # 

4# | '_ \| | | || |/ _ \ / _ \| | | '_ \ / _` || | _ / _ \ '_ \ / _ \ '__| |/ __| |_) / _` | __| '_ \ # 

5# | |_) | |_| || | (_) | (_) | | | | | | (_| || |_| | __/ | | | __/ | | | (__| __/ (_| | |_| | | | # 

6# | .__/ \__, ||_|\___/ \___/|_|_|_| |_|\__, (_)____|\___|_| |_|\___|_| |_|\___|_| \__,_|\__|_| |_| # 

7# |_| |___/ |___/ # 

8# ==================================================================================================================== # 

9# Authors: # 

10# Patrick Lehmann # 

11# # 

12# License: # 

13# ==================================================================================================================== # 

14# Copyright 2017-2025 Patrick Lehmann - Bötzingen, Germany # 

15# # 

16# Licensed under the Apache License, Version 2.0 (the "License"); # 

17# you may not use this file except in compliance with the License. # 

18# You may obtain a copy of the License at # 

19# # 

20# http://www.apache.org/licenses/LICENSE-2.0 # 

21# # 

22# Unless required by applicable law or agreed to in writing, software # 

23# distributed under the License is distributed on an "AS IS" BASIS, # 

24# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # 

25# See the License for the specific language governing permissions and # 

26# limitations under the License. # 

27# # 

28# SPDX-License-Identifier: Apache-2.0 # 

29# ==================================================================================================================== # 

30# 

31""" 

32This package provides a representation for a Uniform Resource Locator (URL). 

33 

34.. code-block:: 

35 

36 [schema://][user[:password]@]domain.tld[:port]/path/to/file[?query][#fragment] 

37""" 

38from sys import version_info 

39 

40from enum import IntFlag 

41from re import compile as re_compile 

42from typing import Dict, Optional as Nullable, Mapping 

43 

44try: 

45 from pyTooling.Decorators import export, readonly 

46 from pyTooling.Exceptions import ToolingException 

47 from pyTooling.Common import getFullyQualifiedName 

48 from pyTooling.GenericPath import RootMixIn, ElementMixIn, PathMixIn 

49except (ImportError, ModuleNotFoundError): # pragma: no cover 

50 print("[pyTooling.GenericPath.URL] Could not import from 'pyTooling.*'!") 

51 

52 try: 

53 from Decorators import export, readonly 

54 from Exceptions import ToolingException 

55 from Common import getFullyQualifiedName 

56 from GenericPath import RootMixIn, ElementMixIn, PathMixIn 

57 except (ImportError, ModuleNotFoundError) as ex: # pragma: no cover 

58 print("[pyTooling.GenericPath.URL] Could not import directly!") 

59 raise ex 

60 

61 

62__all__ = ["URL_PATTERN", "URL_REGEXP"] 

63 

64URL_PATTERN = ( 

65 r"""(?:(?P<scheme>\w+)://)?""" 

66 r"""(?:(?P<user>[-a-zA-Z0-9_]+)(?::(?P<password>[-a-zA-Z0-9_]+))?@)?""" 

67 r"""(?:(?P<host>(?:[-a-zA-Z0-9_]+)(?:\.[-a-zA-Z0-9_]+)*\.?)(?:\:(?P<port>\d+))?)?""" 

68 r"""(?P<path>[^?#]*?)""" 

69 r"""(?:\?(?P<query>[^#]+?))?""" 

70 r"""(?:#(?P<fragment>.+?))?""" 

71) #: Regular expression pattern for validating and splitting a URL. 

72URL_REGEXP = re_compile("^" + URL_PATTERN + "$") #: Precompiled regular expression for URL validation. 

73 

74 

75@export 

76class Protocols(IntFlag): 

77 """Enumeration of supported URL schemes.""" 

78 

79 TLS = 1 #: Transport Layer Security 

80 HTTP = 2 #: Hyper Text Transfer Protocol 

81 HTTPS = 4 #: SSL/TLS secured HTTP 

82 FTP = 8 #: File Transfer Protocol 

83 FTPS = 16 #: SSL/TLS secured FTP 

84 FILE = 32 #: Local files 

85 

86 

87@export 

88class Host(RootMixIn): 

89 """Represents a host as either hostname, DNS or IP-address including the port number in a URL.""" 

90 

91 _hostname: str 

92 _port: Nullable[int] 

93 

94 def __init__( 

95 self, 

96 hostname: str, 

97 port: Nullable[int] = None 

98 ) -> None: 

99 """ 

100 Initialize a host instance described by host name and port number. 

101 

102 :param hostname: Name of the host (either IP or DNS). 

103 :param port: Port number. 

104 """ 

105 super().__init__() 

106 

107 if not isinstance(hostname, str): 107 ↛ 108line 107 didn't jump to line 108 because the condition on line 107 was never true

108 ex = TypeError(f"Parameter 'hostname' is not of type 'str'.") 

109 if version_info >= (3, 11): # pragma: no cover 

110 ex.add_note(f"Got type '{getFullyQualifiedName(hostname)}'.") 

111 raise ex 

112 self._hostname = hostname 

113 

114 if port is None: 

115 pass 

116 elif not isinstance(port, int): 116 ↛ 117line 116 didn't jump to line 117 because the condition on line 116 was never true

117 ex = TypeError(f"Parameter 'port' is not of type 'int'.") 

118 if version_info >= (3, 11): # pragma: no cover 

119 ex.add_note(f"Got type '{getFullyQualifiedName(hostname)}'.") 

120 raise ex 

121 elif not (0 <= port < 65536): 121 ↛ 122line 121 didn't jump to line 122 because the condition on line 121 was never true

122 ex = ValueError(f"Parameter 'port' is out of range 0..65535.") 

123 if version_info >= (3, 11): # pragma: no cover 

124 ex.add_note(f"Got value '{port}'.") 

125 raise ex 

126 self._port = port 

127 

128 @readonly 

129 def Hostname(self) -> str: 

130 """Hostname or IP address as string.""" 

131 return self._hostname 

132 

133 @readonly 

134 def Port(self) -> Nullable[int]: 

135 """Port number as integer.""" 

136 return self._port 

137 

138 def __str__(self) -> str: 

139 result = self._hostname 

140 if self._port is not None: 

141 result += f":{self._port}" 

142 

143 return result 

144 

145 def Copy(self) -> "Host": 

146 """ 

147 Create a copy of this object. 

148 

149 :return: A new Host instance. 

150 """ 

151 return self.__class__( 

152 self._hostname, 

153 self._port 

154 ) 

155 

156 

157@export 

158class Element(ElementMixIn): 

159 """Derived class for the URL context.""" 

160 

161 

162@export 

163class Path(PathMixIn): 

164 """Represents a path in a URL.""" 

165 

166 ELEMENT_DELIMITER = "/" #: Delimiter symbol in URLs between path elements. 

167 ROOT_DELIMITER = "/" #: Delimiter symbol in URLs between root element and first path element. 

168 

169 @classmethod 

170 def Parse(cls, path: str, root: Nullable[Host] = None) -> "Path": 

171 return super().Parse(path, root, cls, Element) 

172 

173 

174@export 

175class URL: 

176 """ 

177 Represents a URL (Uniform Resource Locator) including scheme, host, credentials, path, query and fragment. 

178 

179 .. code-block:: 

180 

181 [schema://][user[:password]@]domain.tld[:port]/path/to/file[?query][#fragment] 

182 """ 

183 

184 _scheme: Protocols 

185 _user: Nullable[str] 

186 _password: Nullable[str] 

187 _host: Nullable[Host] 

188 _path: Path 

189 _query: Nullable[Dict[str, str]] 

190 _fragment: Nullable[str] 

191 

192 def __init__( 

193 self, 

194 scheme: Protocols, 

195 path: Path, 

196 host: Nullable[Host] = None, 

197 user: Nullable[str] = None, 

198 password: Nullable[str] = None, 

199 query: Nullable[Mapping[str, str]] = None, 

200 fragment: Nullable[str] = None 

201 ) -> None: 

202 """ 

203 Initializes a Uniform Resource Locator (URL). 

204 

205 :param scheme: Transport scheme to be used for a specified resource. 

206 :param path: Path to the resource. 

207 :param host: Hostname where the resource is located. 

208 :param user: Username for basic authentication. 

209 :param password: Password for basic authentication. 

210 :param query: An optional query string. 

211 :param fragment: An optional fragment. 

212 """ 

213 if scheme is not None and not isinstance(scheme, Protocols): 213 ↛ 214line 213 didn't jump to line 214 because the condition on line 213 was never true

214 ex = TypeError(f"Parameter 'scheme' is not of type 'Protocols'.") 

215 if version_info >= (3, 11): # pragma: no cover 

216 ex.add_note(f"Got type '{getFullyQualifiedName(scheme)}'.") 

217 raise ex 

218 self._scheme = scheme 

219 

220 if user is not None and not isinstance(user, str): 220 ↛ 221line 220 didn't jump to line 221 because the condition on line 220 was never true

221 ex = TypeError(f"Parameter 'user' is not of type 'str'.") 

222 if version_info >= (3, 11): # pragma: no cover 

223 ex.add_note(f"Got type '{getFullyQualifiedName(user)}'.") 

224 raise ex 

225 self._user = user 

226 

227 if password is not None and not isinstance(password, str): 227 ↛ 228line 227 didn't jump to line 228 because the condition on line 227 was never true

228 ex = TypeError(f"Parameter 'password' is not of type 'str'.") 

229 if version_info >= (3, 11): # pragma: no cover 

230 ex.add_note(f"Got type '{getFullyQualifiedName(password)}'.") 

231 raise ex 

232 self._password = password 

233 

234 if host is not None and not isinstance(host, Host): 234 ↛ 235line 234 didn't jump to line 235 because the condition on line 234 was never true

235 ex = TypeError(f"Parameter 'host' is not of type 'Host'.") 

236 if version_info >= (3, 11): # pragma: no cover 

237 ex.add_note(f"Got type '{getFullyQualifiedName(host)}'.") 

238 raise ex 

239 self._host = host 

240 

241 if path is not None and not isinstance(path, Path): 241 ↛ 242line 241 didn't jump to line 242 because the condition on line 241 was never true

242 ex = TypeError(f"Parameter 'path' is not of type 'Path'.") 

243 if version_info >= (3, 11): # pragma: no cover 

244 ex.add_note(f"Got type '{getFullyQualifiedName(path)}'.") 

245 raise ex 

246 self._path = path 

247 

248 if query is not None: 

249 if not isinstance(query, Mapping): 249 ↛ 250line 249 didn't jump to line 250 because the condition on line 249 was never true

250 ex = TypeError(f"Parameter 'query' is not a mapping ('dict', ...).") 

251 if version_info >= (3, 11): # pragma: no cover 

252 ex.add_note(f"Got type '{getFullyQualifiedName(query)}'.") 

253 raise ex 

254 

255 self._query = {keyword: value for keyword, value in query.items()} 

256 else: 

257 self._query = None 

258 

259 if fragment is not None and not isinstance(fragment, str): 259 ↛ 260line 259 didn't jump to line 260 because the condition on line 259 was never true

260 ex = TypeError(f"Parameter 'fragment' is not of type 'str'.") 

261 if version_info >= (3, 11): # pragma: no cover 

262 ex.add_note(f"Got type '{getFullyQualifiedName(fragment)}'.") 

263 raise ex 

264 self._fragment = fragment 

265 

266 @readonly 

267 def Scheme(self) -> Protocols: 

268 return self._scheme 

269 

270 @readonly 

271 def User(self) -> Nullable[str]: 

272 return self._user 

273 

274 @readonly 

275 def Password(self) -> Nullable[str]: 

276 return self._password 

277 

278 @readonly 

279 def Host(self) -> Nullable[Host]: 

280 """ 

281 Returns the host part (host name and port number) of the URL. 

282 

283 :return: The host part of the URL. 

284 """ 

285 return self._host 

286 

287 @readonly 

288 def Path(self) -> Path: 

289 return self._path 

290 

291 @readonly 

292 def Query(self) -> Nullable[Dict[str, str]]: 

293 """ 

294 Returns a dictionary of key-value pairs representing the query part in a URL. 

295 

296 :returns: A dictionary representing the query. 

297 """ 

298 return self._query 

299 

300 @readonly 

301 def Fragment(self) -> Nullable[str]: 

302 """ 

303 Returns the fragment part of the URL. 

304 

305 :return: The fragment part of the URL. 

306 """ 

307 return self._fragment 

308 

309 # http://semaphore.plc2.de:5000/api/v1/semaphore?name=Riviera&foo=bar#page2 

310 @classmethod 

311 def Parse(cls, url: str) -> "URL": 

312 """ 

313 Parse a URL string and returns a URL object. 

314 

315 :param url: URL as string to be parsed. 

316 :returns: A URL object. 

317 :raises ToolingException: When syntax does not match. 

318 """ 

319 matches = URL_REGEXP.match(url) 

320 if matches is not None: 320 ↛ 354line 320 didn't jump to line 354 because the condition on line 320 was always true

321 scheme = matches.group("scheme") 

322 user = matches.group("user") 

323 password = matches.group("password") 

324 host = matches.group("host") 

325 

326 port = matches.group("port") 

327 if port is not None: 

328 port = int(port) 

329 path = matches.group("path") 

330 query = matches.group("query") 

331 fragment = matches.group("fragment") 

332 

333 scheme = None if scheme is None else Protocols[scheme.upper()] 

334 hostObj = None if host is None else Host(host, port) 

335 

336 pathObj = Path.Parse(path, hostObj) 

337 

338 parameters = {} 

339 if query is not None: 

340 for pair in query.split("&"): 

341 key, value = pair.split("=") 

342 parameters[key] = value 

343 

344 return cls( 

345 scheme, 

346 pathObj, 

347 hostObj, 

348 user, 

349 password, 

350 parameters if len(parameters) > 0 else None, 

351 fragment 

352 ) 

353 

354 raise ToolingException(f"Syntax error when parsing URL '{url}'.") 

355 

356 def __str__(self) -> str: 

357 """ 

358 Formats the URL object as a string representation. 

359 

360 :return: Formatted URL object. 

361 """ 

362 result = str(self._path) 

363 

364 if self._host is not None: 364 ↛ 367line 364 didn't jump to line 367 because the condition on line 364 was always true

365 result = str(self._host) + result 

366 

367 if self._user is not None: 

368 if self._password is not None: 

369 result = f"{self._user}:{self._password}@{result}" 

370 else: 

371 result = f"{self._user}@{result}" 

372 

373 if self._scheme is not None: 

374 result = self._scheme.name.lower() + "://" + result 

375 

376 if self._query is not None and len(self._query) > 0: 

377 result = result + "?" + "&".join([f"{key}={value}" for key, value in self._query.items()]) 

378 

379 if self._fragment is not None: 

380 result = result + "#" + self._fragment 

381 

382 return result 

383 

384 def WithoutCredentials(self) -> "URL": 

385 """ 

386 Returns a URL object without credentials (username and password). 

387 

388 :return: New URL object without credentials. 

389 """ 

390 return self.__class__( 

391 scheme=self._scheme, 

392 path=self._path, 

393 host=self._host, 

394 query=self._query, 

395 fragment=self._fragment 

396 )