Coverage for pyTooling / GenericPath / URL.py: 80%

172 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-28 12:48 +0000

1# ==================================================================================================================== # 

2# _____ _ _ ____ _ ____ _ _ # 

3# _ __ _ |_ _|__ ___ | (_)_ __ __ _ / ___| ___ _ __ ___ _ __(_) ___| _ \ __ _| |_| |__ # 

4# | '_ \| | | || |/ _ \ / _ \| | | '_ \ / _` || | _ / _ \ '_ \ / _ \ '__| |/ __| |_) / _` | __| '_ \ # 

5# | |_) | |_| || | (_) | (_) | | | | | | (_| || |_| | __/ | | | __/ | | | (__| __/ (_| | |_| | | | # 

6# | .__/ \__, ||_|\___/ \___/|_|_|_| |_|\__, (_)____|\___|_| |_|\___|_| |_|\___|_| \__,_|\__|_| |_| # 

7# |_| |___/ |___/ # 

8# ==================================================================================================================== # 

9# Authors: # 

10# Patrick Lehmann # 

11# # 

12# License: # 

13# ==================================================================================================================== # 

14# Copyright 2017-2025 Patrick Lehmann - Bötzingen, Germany # 

15# # 

16# Licensed under the Apache License, Version 2.0 (the "License"); # 

17# you may not use this file except in compliance with the License. # 

18# You may obtain a copy of the License at # 

19# # 

20# http://www.apache.org/licenses/LICENSE-2.0 # 

21# # 

22# Unless required by applicable law or agreed to in writing, software # 

23# distributed under the License is distributed on an "AS IS" BASIS, # 

24# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # 

25# See the License for the specific language governing permissions and # 

26# limitations under the License. # 

27# # 

28# SPDX-License-Identifier: Apache-2.0 # 

29# ==================================================================================================================== # 

30# 

31""" 

32This package provides a representation for a Uniform Resource Locator (URL). 

33 

34.. code-block:: 

35 

36 [schema://][user[:password]@]domain.tld[:port]/path/to/file[?query][#fragment] 

37""" 

38 

39from enum import IntFlag 

40from re import compile as re_compile 

41from typing import Dict, Optional as Nullable, Mapping 

42 

43try: 

44 from pyTooling.Decorators import export, readonly 

45 from pyTooling.Exceptions import ToolingException 

46 from pyTooling.Common import getFullyQualifiedName 

47 from pyTooling.GenericPath import RootMixIn, ElementMixIn, PathMixIn 

48except (ImportError, ModuleNotFoundError): # pragma: no cover 

49 print("[pyTooling.GenericPath.URL] Could not import from 'pyTooling.*'!") 

50 

51 try: 

52 from Decorators import export, readonly 

53 from Exceptions import ToolingException 

54 from Common import getFullyQualifiedName 

55 from GenericPath import RootMixIn, ElementMixIn, PathMixIn 

56 except (ImportError, ModuleNotFoundError) as ex: # pragma: no cover 

57 print("[pyTooling.GenericPath.URL] Could not import directly!") 

58 raise ex 

59 

60 

61__all__ = ["URL_PATTERN", "URL_REGEXP"] 

62 

63URL_PATTERN = ( 

64 r"""(?:(?P<scheme>\w+)://)?""" 

65 r"""(?:(?P<user>[-a-zA-Z0-9_]+)(?::(?P<password>[-a-zA-Z0-9_]+))?@)?""" 

66 r"""(?:(?P<host>(?:[-a-zA-Z0-9_]+)(?:\.[-a-zA-Z0-9_]+)*\.?)(?:\:(?P<port>\d+))?)?""" 

67 r"""(?P<path>[^?#]*?)""" 

68 r"""(?:\?(?P<query>[^#]+?))?""" 

69 r"""(?:#(?P<fragment>.+?))?""" 

70) #: Regular expression pattern for validating and splitting a URL. 

71URL_REGEXP = re_compile("^" + URL_PATTERN + "$") #: Precompiled regular expression for URL validation. 

72 

73 

74@export 

75class Protocols(IntFlag): 

76 """Enumeration of supported URL schemes.""" 

77 

78 TLS = 1 #: Transport Layer Security 

79 HTTP = 2 #: Hyper Text Transfer Protocol 

80 HTTPS = 4 #: SSL/TLS secured HTTP 

81 FTP = 8 #: File Transfer Protocol 

82 FTPS = 16 #: SSL/TLS secured FTP 

83 FILE = 32 #: Local files 

84 

85 

86@export 

87class Host(RootMixIn): 

88 """Represents a host as either hostname, DNS or IP-address including the port number in a URL.""" 

89 

90 _hostname: str #: Name of the host (DNS name or IP address). 

91 _port: Nullable[int] #: Optional port number. 

92 

93 def __init__( 

94 self, 

95 hostname: str, 

96 port: Nullable[int] = None 

97 ) -> None: 

98 """ 

99 Initialize a host instance described by host name and port number. 

100 

101 :param hostname: Name of the host (either IP address or DNS). 

102 :param port: Port number. 

103 """ 

104 super().__init__() 

105 

106 if not isinstance(hostname, str): 106 ↛ 107line 106 didn't jump to line 107 because the condition on line 106 was never true

107 ex = TypeError("Parameter 'hostname' is not of type 'str'.") 

108 ex.add_note(f"Got type '{getFullyQualifiedName(hostname)}'.") 

109 raise ex 

110 

111 self._hostname = hostname 

112 

113 if port is None: 

114 pass 

115 elif not isinstance(port, int): 115 ↛ 116line 115 didn't jump to line 116 because the condition on line 115 was never true

116 ex = TypeError("Parameter 'port' is not of type 'int'.") 

117 ex.add_note(f"Got type '{getFullyQualifiedName(port)}'.") 

118 raise ex 

119 elif not (0 <= port < 65536): 119 ↛ 120line 119 didn't jump to line 120 because the condition on line 119 was never true

120 ex = ValueError("Parameter 'port' is out of range 0..65535.") 

121 ex.add_note(f"Got value '{port}'.") 

122 raise ex 

123 

124 self._port = port 

125 

126 @readonly 

127 def Hostname(self) -> str: 

128 """ 

129 Read-only property to access the hostname. 

130 

131 :returns: Hostname as DNS name or IP address. 

132 """ 

133 return self._hostname 

134 

135 @readonly 

136 def Port(self) -> Nullable[int]: 

137 """ 

138 Read-only property to access the optional port number. 

139 

140 :returns: Optional port number. 

141 """ 

142 return self._port 

143 

144 def __str__(self) -> str: 

145 result = self._hostname 

146 if self._port is not None: 

147 result += f":{self._port}" 

148 

149 return result 

150 

151 def Copy(self) -> "Host": 

152 """ 

153 Create a copy of this object. 

154 

155 :return: A new :class:`Host` instance. 

156 """ 

157 return self.__class__( 

158 self._hostname, 

159 self._port 

160 ) 

161 

162 

163@export 

164class Element(ElementMixIn): 

165 """Derived class for the URL context.""" 

166 

167 

168@export 

169class Path(PathMixIn): 

170 """Represents a path in a URL.""" 

171 

172 ELEMENT_DELIMITER = "/" #: Delimiter symbol in URLs between path elements. 

173 ROOT_DELIMITER = "/" #: Delimiter symbol in URLs between root element and first path element. 

174 

175 @classmethod 

176 def Parse(cls, path: str, root: Nullable[Host] = None) -> "Path": 

177 return super().Parse(path, root, cls, Element) 

178 

179 

180@export 

181class URL: 

182 """ 

183 Represents a URL (Uniform Resource Locator) including scheme, host, credentials, path, query and fragment. 

184 

185 .. code-block:: 

186 

187 [schema://][user[:password]@]domain.tld[:port]/path/to/file[?query][#fragment] 

188 """ 

189 

190 _scheme: Protocols 

191 _user: Nullable[str] 

192 _password: Nullable[str] 

193 _host: Nullable[Host] 

194 _path: Path 

195 _query: Nullable[Dict[str, str]] 

196 _fragment: Nullable[str] 

197 

198 def __init__( 

199 self, 

200 scheme: Protocols, 

201 path: Path, 

202 host: Nullable[Host] = None, 

203 user: Nullable[str] = None, 

204 password: Nullable[str] = None, 

205 query: Nullable[Mapping[str, str]] = None, 

206 fragment: Nullable[str] = None 

207 ) -> None: 

208 """ 

209 Initializes a Uniform Resource Locator (URL). 

210 

211 :param scheme: Transport scheme to be used for a specified resource. 

212 :param path: Path to the resource. 

213 :param host: Hostname where the resource is located. 

214 :param user: Username for basic authentication. 

215 :param password: Password for basic authentication. 

216 :param query: An optional query string. 

217 :param fragment: An optional fragment. 

218 """ 

219 if scheme is not None and not isinstance(scheme, Protocols): 219 ↛ 220line 219 didn't jump to line 220 because the condition on line 219 was never true

220 ex = TypeError("Parameter 'scheme' is not of type 'Protocols'.") 

221 ex.add_note(f"Got type '{getFullyQualifiedName(scheme)}'.") 

222 raise ex 

223 

224 self._scheme = scheme 

225 

226 if user is not None and not isinstance(user, str): 226 ↛ 227line 226 didn't jump to line 227 because the condition on line 226 was never true

227 ex = TypeError("Parameter 'user' is not of type 'str'.") 

228 ex.add_note(f"Got type '{getFullyQualifiedName(user)}'.") 

229 raise ex 

230 

231 self._user = user 

232 

233 if password is not None and not isinstance(password, str): 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true

234 ex = TypeError(f"Parameter 'password' is not of type 'str'.") 

235 ex.add_note(f"Got type '{getFullyQualifiedName(password)}'.") 

236 raise ex 

237 

238 self._password = password 

239 

240 if host is not None and not isinstance(host, Host): 240 ↛ 241line 240 didn't jump to line 241 because the condition on line 240 was never true

241 ex = TypeError(f"Parameter 'host' is not of type 'Host'.") 

242 ex.add_note(f"Got type '{getFullyQualifiedName(host)}'.") 

243 raise ex 

244 self._host = host 

245 

246 if path is not None and not isinstance(path, Path): 246 ↛ 247line 246 didn't jump to line 247 because the condition on line 246 was never true

247 ex = TypeError(f"Parameter 'path' is not of type 'Path'.") 

248 ex.add_note(f"Got type '{getFullyQualifiedName(path)}'.") 

249 raise ex 

250 

251 self._path = path 

252 

253 if query is not None: 

254 if not isinstance(query, Mapping): 254 ↛ 255line 254 didn't jump to line 255 because the condition on line 254 was never true

255 ex = TypeError(f"Parameter 'query' is not a mapping ('dict', ...).") 

256 ex.add_note(f"Got type '{getFullyQualifiedName(query)}'.") 

257 raise ex 

258 

259 self._query = {keyword: value for keyword, value in query.items()} 

260 else: 

261 self._query = None 

262 

263 if fragment is not None and not isinstance(fragment, str): 263 ↛ 264line 263 didn't jump to line 264 because the condition on line 263 was never true

264 ex = TypeError(f"Parameter 'fragment' is not of type 'str'.") 

265 ex.add_note(f"Got type '{getFullyQualifiedName(fragment)}'.") 

266 raise ex 

267 

268 self._fragment = fragment 

269 

270 @readonly 

271 def Scheme(self) -> Protocols: 

272 """ 

273 Read-only property to access the URL scheme. 

274 

275 :returns: URL scheme of the URL. 

276 """ 

277 return self._scheme 

278 

279 @readonly 

280 def User(self) -> Nullable[str]: 

281 """ 

282 Read-only property to access the optional username. 

283 

284 :returns: Optional username within the URL. 

285 """ 

286 return self._user 

287 

288 @readonly 

289 def Password(self) -> Nullable[str]: 

290 """ 

291 Read-only property to access the optional password. 

292 

293 :returns: Optional password within a URL. 

294 """ 

295 return self._password 

296 

297 @readonly 

298 def Host(self) -> Nullable[Host]: 

299 """ 

300 Read-only property to access the host part (hostname and port number) of the URL. 

301 

302 :returns: The host part of the URL. 

303 """ 

304 return self._host 

305 

306 @readonly 

307 def Path(self) -> Path: 

308 """ 

309 Read-only property to access the path part of the URL. 

310 

311 :returns: Path part of the URL. 

312 """ 

313 return self._path 

314 

315 @readonly 

316 def Query(self) -> Nullable[Dict[str, str]]: 

317 """ 

318 Read-only property to access the dictionary of key-value pairs representing the query part in the URL. 

319 

320 :returns: A dictionary representing the query as key-value pairs. 

321 """ 

322 return self._query 

323 

324 @readonly 

325 def Fragment(self) -> Nullable[str]: 

326 """ 

327 Read-only property to access the fragment part of the URL. 

328 

329 :returns: The fragment part of the URL. 

330 """ 

331 return self._fragment 

332 

333 # http://semaphore.plc2.de:5000/api/v1/semaphore?name=Riviera&foo=bar#page2 

334 @classmethod 

335 def Parse(cls, url: str) -> "URL": 

336 """ 

337 Parse a URL string and returns the URL object. 

338 

339 :param url: URL as string to be parsed. 

340 :returns: A URL object. 

341 :raises ToolingException: When syntax does not match. 

342 """ 

343 matches = URL_REGEXP.match(url) 

344 if matches is not None: 344 ↛ 378line 344 didn't jump to line 378 because the condition on line 344 was always true

345 scheme = matches.group("scheme") 

346 user = matches.group("user") 

347 password = matches.group("password") 

348 host = matches.group("host") 

349 

350 port = matches.group("port") 

351 if port is not None: 

352 port = int(port) 

353 path = matches.group("path") 

354 query = matches.group("query") 

355 fragment = matches.group("fragment") 

356 

357 scheme = None if scheme is None else Protocols[scheme.upper()] 

358 hostObj = None if host is None else Host(host, port) 

359 

360 pathObj = Path.Parse(path, hostObj) 

361 

362 parameters = {} 

363 if query is not None: 

364 for pair in query.split("&"): 

365 key, value = pair.split("=") 

366 parameters[key] = value 

367 

368 return cls( 

369 scheme, 

370 pathObj, 

371 hostObj, 

372 user, 

373 password, 

374 parameters if len(parameters) > 0 else None, 

375 fragment 

376 ) 

377 

378 raise ToolingException(f"Syntax error when parsing URL '{url}'.") 

379 

380 def __str__(self) -> str: 

381 """ 

382 Formats the URL object as a string representation. 

383 

384 :returns: Formatted URL object. 

385 """ 

386 result = str(self._path) 

387 

388 if self._host is not None: 388 ↛ 391line 388 didn't jump to line 391 because the condition on line 388 was always true

389 result = str(self._host) + result 

390 

391 if self._user is not None: 

392 if self._password is not None: 

393 result = f"{self._user}:{self._password}@{result}" 

394 else: 

395 result = f"{self._user}@{result}" 

396 

397 if self._scheme is not None: 

398 result = self._scheme.name.lower() + "://" + result 

399 

400 if self._query is not None and len(self._query) > 0: 

401 result = result + "?" + "&".join([f"{key}={value}" for key, value in self._query.items()]) 

402 

403 if self._fragment is not None: 

404 result = result + "#" + self._fragment 

405 

406 return result 

407 

408 def WithoutCredentials(self) -> "URL": 

409 """ 

410 Returns a URL object without credentials (username and password). 

411 

412 :returns: New URL object without credentials. 

413 """ 

414 return self.__class__( 

415 scheme=self._scheme, 

416 path=self._path, 

417 host=self._host, 

418 query=self._query, 

419 fragment=self._fragment 

420 )