| 1 # Copyright (C) 2001-2007 Python Software Foundation 2 # Author: Barry Warsaw 3 # Contact: email-sig@python.org 4 5 """Basic message object for the email package object model.""" 6 7 __all__ = ['Message'] 8 9 import re 10 import uu 11 from io import BytesIO, StringIO 12 13 # Intrapackage imports 14 from email import utils 15 from email import errors 16 from email._policybase import compat32 17 from email import charset as _charset 18 from email._encoded_words import decode_b 19 Charset = _charset.Charset 20 21 SEMISPACE = '; ' 22 23 # Regular expression that matches `special' characters in parameters, the 24 # existence of which force quoting of the parameter value. 25 tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') 26 27 28 def _splitparam(param): 29 # Split header parameters. BAW: this may be too simple. It isn't 30 # strictly RFC 2045 (section 5.1) compliant, but it catches most headers 31 # found in the wild. We may eventually need a full fledged parser. 32 # RDM: we might have a Header here; for now just stringify it. 33 a, sep, b = str(param).partition(';') 34 if not sep: 35 return a.strip(), None 36 return a.strip(), b.strip() 37 38 def _formatparam(param, value=None, quote=True): 39 """Convenience function to format and return a key=value pair. 40 41 This will quote the value if needed or if quote is true. If value is a 42 three tuple (charset, language, value), it will be encoded according 43 to RFC2231 rules. If it contains non-ascii characters it will likewise 44 be encoded according to RFC2231 rules, using the utf-8 charset and 45 a null language. 46 """ 47 if value is not None and len(value) > 0: 48 # A tuple is used for RFC 2231 encoded parameter values where items 49 # are (charset, language, value). charset is a string, not a Charset 50 # instance. RFC 2231 encoded values are never quoted, per RFC. 51 if isinstance(value, tuple): 52 # Encode as per RFC 2231 53 param += '*' 54 value = utils.encode_rfc2231(value[2], value[0], value[1]) 55 return '%s=%s' % (param, value) 56 else: 57 try: 58 value.encode('ascii') 59 except UnicodeEncodeError: 60 param += '*' 61 value = utils.encode_rfc2231(value, 'utf-8', '') 62 return '%s=%s' % (param, value) 63 # BAW: Please check this. I think that if quote is set it should 64 # force quoting even if not necessary. 65 if quote or tspecials.search(value): 66 return '%s="%s"' % (param, utils.quote(value)) 67 else: 68 return '%s=%s' % (param, value) 69 else: 70 return param 71 72 def _parseparam(s): 73 # RDM This might be a Header, so for now stringify it. 74 s = ';' + str(s) 75 plist = [] 76 while s[:1] == ';': 77 s = s[1:] 78 end = s.find(';') 79 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: 80 end = s.find(';', end + 1) 81 if end < 0: 82 end = len(s) 83 f = s[:end] 84 if '=' in f: 85 i = f.index('=') 86 f = f[:i].strip().lower() + '=' + f[i+1:].strip() 87 plist.append(f.strip()) 88 s = s[end:] 89 return plist 90 91 92 def _unquotevalue(value): 93 # This is different than utils.collapse_rfc2231_value() because it doesn't 94 # try to convert the value to a unicode. Message.get_param() and 95 # Message.get_params() are both currently defined to return the tuple in 96 # the face of RFC 2231 parameters. 97 if isinstance(value, tuple): 98 return value[0], value[1], utils.unquote(value[2]) 99 else: 100 return utils.unquote(value) 101 102 103 104 class Message: 105 """Basic message object. 106 107 A message object is defined as something that has a bunch of RFC 2822 108 headers and a payload. It may optionally have an envelope header 109 (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a 110 multipart or a message/rfc822), then the payload is a list of Message 111 objects, otherwise it is a string. 112 113 Message objects implement part of the `mapping' interface, which assumes 114 there is exactly one occurrence of the header per message. Some headers 115 do in fact appear multiple times (e.g. Received) and for those headers, 116 you must use the explicit API to set or get all the headers. Not all of 117 the mapping methods are implemented. 118 """ 119 def __init__(self, policy=compat32): 120 self.policy = policy 121 self._headers = [] 122 self._unixfrom = None 123 self._payload = None 124 self._charset = None 125 # Defaults for multipart messages 126 self.preamble = self.epilogue = None 127 self.defects = [] 128 # Default content type 129 self._default_type = 'text/plain' 130 131 def __str__(self): 132 """Return the entire formatted message as a string. 133 """ 134 return self.as_string() 135 136 def as_string(self, unixfrom=False, maxheaderlen=0, policy=None): 137 """Return the entire formatted message as a string. 138 139 Optional 'unixfrom', when true, means include the Unix From_ envelope 140 header. For backward compatibility reasons, if maxheaderlen is 141 not specified it defaults to 0, so you must override it explicitly 142 if you want a different maxheaderlen. 'policy' is passed to the 143 Generator instance used to serialize the mesasge; if it is not 144 specified the policy associated with the message instance is used. 145 146 If the message object contains binary data that is not encoded 147 according to RFC standards, the non-compliant data will be replaced by 148 unicode "unknown character" code points. 149 """ 150 from email.generator import Generator 151 policy = self.policy if policy is None else policy 152 fp = StringIO() 153 g = Generator(fp, 154 mangle_from_=False, 155 maxheaderlen=maxheaderlen, 156 policy=policy) 157 g.flatten(self, unixfrom=unixfrom) 158 return fp.getvalue() 159 160 def __bytes__(self): 161 """Return the entire formatted message as a bytes object. 162 """ 163 return self.as_bytes() 164 165 def as_bytes(self, unixfrom=False, policy=None): 166 """Return the entire formatted message as a bytes object. 167 168 Optional 'unixfrom', when true, means include the Unix From_ envelope 169 header. 'policy' is passed to the BytesGenerator instance used to 170 serialize the message; if not specified the policy associated with 171 the message instance is used. 172 """ 173 from email.generator import BytesGenerator 174 policy = self.policy if policy is None else policy 175 fp = BytesIO() 176 g = BytesGenerator(fp, mangle_from_=False, policy=policy) 177 g.flatten(self, unixfrom=unixfrom) 178 return fp.getvalue() 179 180 def is_multipart(self): 181 """Return True if the message consists of multiple parts.""" 182 return isinstance(self._payload, list) 183 184 # 185 # Unix From_ line 186 # 187 def set_unixfrom(self, unixfrom): 188 self._unixfrom = unixfrom 189 190 def get_unixfrom(self): 191 return self._unixfrom 192 193 # 194 # Payload manipulation. 195 # 196 def attach(self, payload): 197 """Add the given payload to the current payload. 198 199 The current payload will always be a list of objects after this method 200 is called. If you want to set the payload to a scalar object, use 201 set_payload() instead. 202 """ 203 if self._payload is None: 204 self._payload = [payload] 205 else: 206 self._payload.append(payload) 207 208 def get_payload(self, i=None, decode=False): 209 """Return a reference to the payload. 210 211 The payload will either be a list object or a string. If you mutate 212 the list object, you modify the message's payload in place. Optional 213 i returns that index into the payload. 214 215 Optional decode is a flag indicating whether the payload should be 216 decoded or not, according to the Content-Transfer-Encoding header 217 (default is False). 218 219 When True and the message is not a multipart, the payload will be 220 decoded if this header's value is `quoted-printable' or `base64'. If 221 some other encoding is used, or the header is missing, or if the 222 payload has bogus data (i.e. bogus base64 or uuencoded data), the 223 payload is returned as-is. 224 225 If the message is a multipart and the decode flag is True, then None 226 is returned. 227 """ 228 # Here is the logic table for this code, based on the email5.0.0 code: 229 # i decode is_multipart result 230 # ------ ------ ------------ ------------------------------ 231 # None True True None 232 # i True True None 233 # None False True _payload (a list) 234 # i False True _payload element i (a Message) 235 # i False False error (not a list) 236 # i True False error (not a list) 237 # None False False _payload 238 # None True False _payload decoded (bytes) 239 # Note that Barry planned to factor out the 'decode' case, but that 240 # isn't so easy now that we handle the 8 bit data, which needs to be 241 # converted in both the decode and non-decode path. 242 if self.is_multipart(): 243 if decode: 244 return None 245 if i is None: 246 return self._payload 247 else: 248 return self._payload[i] 249 # For backward compatibility, Use isinstance and this error message 250 # instead of the more logical is_multipart test. 251 if i is not None and not isinstance(self._payload, list): 252 raise TypeError('Expected list, got %s' % type(self._payload)) 253 payload = self._payload 254 # cte might be a Header, so for now stringify it. 255 cte = str(self.get('content-transfer-encoding', '')).lower() 256 # payload may be bytes here. 257 if isinstance(payload, str): 258 if utils._has_surrogates(payload): 259 bpayload = payload.encode('ascii', 'surrogateescape') 260 if not decode: 261 try: 262 payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') 263 except LookupError: 264 payload = bpayload.decode('ascii', 'replace') 265 elif decode: 266 try: 267 bpayload = payload.encode('ascii') 268 except UnicodeError: 269 # This won't happen for RFC compliant messages (messages 270 # containing only ASCII codepoints in the unicode input). 271 # If it does happen, turn the string into bytes in a way 272 # guaranteed not to fail. 273 bpayload = payload.encode('raw-unicode-escape') 274 if not decode: 275 return payload 276 if cte == 'quoted-printable': 277 return utils._qdecode(bpayload) 278 elif cte == 'base64': 279 # XXX: this is a bit of a hack; decode_b should probably be factored 280 # out somewhere, but I haven't figured out where yet. 281 value, defects = decode_b(b''.join(bpayload.splitlines())) 282 for defect in defects: 283 self.policy.handle_defect(self, defect) 284 return value 285 elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): 286 in_file = BytesIO(bpayload) 287 out_file = BytesIO() 288 try: 289 uu.decode(in_file, out_file, quiet=True) 290 return out_file.getvalue() 291 except uu.Error: 292 # Some decoding problem 293 return bpayload 294 if isinstance(payload, str): 295 return bpayload 296 return payload 297 298 def set_payload(self, payload, charset=None): 299 """Set the payload to the given value. 300 301 Optional charset sets the message's default character set. See 302 set_charset() for details. 303 """ 304 if isinstance(payload, bytes): 305 payload = payload.decode('ascii', 'surrogateescape') 306 self._payload = payload 307 if charset is not None: 308 self.set_charset(charset) 309 310 def set_charset(self, charset): 311 """Set the charset of the payload to a given character set. 312 313 charset can be a Charset instance, a string naming a character set, or 314 None. If it is a string it will be converted to a Charset instance. 315 If charset is None, the charset parameter will be removed from the 316 Content-Type field. Anything else will generate a TypeError. 317 318 The message will be assumed to be of type text/* encoded with 319 charset.input_charset. It will be converted to charset.output_charset 320 and encoded properly, if needed, when generating the plain text 321 representation of the message. MIME headers (MIME-Version, 322 Content-Type, Content-Transfer-Encoding) will be added as needed. 323 """ 324 if charset is None: 325 self.del_param('charset') 326 self._charset = None 327 return 328 if not isinstance(charset, Charset): 329 charset = Charset(charset) 330 self._charset = charset 331 if 'MIME-Version' not in self: 332 self.add_header('MIME-Version', '1.0') 333 if 'Content-Type' not in self: 334 self.add_header('Content-Type', 'text/plain', 335 charset=charset.get_output_charset()) 336 else: 337 self.set_param('charset', charset.get_output_charset()) 338 if charset != charset.get_output_charset(): 339 self._payload = charset.body_encode(self._payload) 340 if 'Content-Transfer-Encoding' not in self: 341 cte = charset.get_body_encoding() 342 try: 343 cte(self) 344 except TypeError: 345 self._payload = charset.body_encode(self._payload) 346 self.add_header('Content-Transfer-Encoding', cte) 347 348 def get_charset(self): 349 """Return the Charset instance associated with the message's payload. 350 """ 351 return self._charset 352 353 # 354 # MAPPING INTERFACE (partial) 355 # 356 def __len__(self): 357 """Return the total number of headers, including duplicates.""" 358 return len(self._headers) 359 360 def __getitem__(self, name): 361 """Get a header value. 362 363 Return None if the header is missing instead of raising an exception. 364 365 Note that if the header appeared multiple times, exactly which 366 occurrence gets returned is undefined. Use get_all() to get all 367 the values matching a header field name. 368 """ 369 return self.get(name) 370 371 def __setitem__(self, name, val): 372 """Set the value of a header. 373 374 Note: this does not overwrite an existing header with the same field 375 name. Use __delitem__() first to delete any existing headers. 376 """ 377 max_count = self.policy.header_max_count(name) 378 if max_count: 379 lname = name.lower() 380 found = 0 381 for k, v in self._headers: 382 if k.lower() == lname: 383 found += 1 384 if found >= max_count: 385 raise ValueError("There may be at most {} {} headers " 386 "in a message".format(max_count, name)) 387 self._headers.append(self.policy.header_store_parse(name, val)) 388 389 def __delitem__(self, name): 390 """Delete all occurrences of a header, if present. 391 392 Does not raise an exception if the header is missing. 393 """ 394 name = name.lower() 395 newheaders = [] 396 for k, v in self._headers: 397 if k.lower() != name: 398 newheaders.append((k, v)) 399 self._headers = newheaders 400 401 def __contains__(self, name): 402 return name.lower() in [k.lower() for k, v in self._headers] 403 404 def __iter__(self): 405 for field, value in self._headers: 406 yield field 407 408 def keys(self): 409 """Return a list of all the message's header field names. 410 411 These will be sorted in the order they appeared in the original 412 message, or were added to the message, and may contain duplicates. 413 Any fields deleted and re-inserted are always appended to the header 414 list. 415 """ 416 return [k for k, v in self._headers] 417 418 def values(self): 419 """Return a list of all the message's header values. 420 421 These will be sorted in the order they appeared in the original 422 message, or were added to the message, and may contain duplicates. 423 Any fields deleted and re-inserted are always appended to the header 424 list. 425 """ 426 return [self.policy.header_fetch_parse(k, v) 427 for k, v in self._headers] 428 429 def items(self): 430 """Get all the message's header fields and values. 431 432 These will be sorted in the order they appeared in the original 433 message, or were added to the message, and may contain duplicates. 434 Any fields deleted and re-inserted are always appended to the header 435 list. 436 """ 437 return [(k, self.policy.header_fetch_parse(k, v)) 438 for k, v in self._headers] 439 440 def get(self, name, failobj=None): 441 """Get a header value. 442 443 Like __getitem__() but return failobj instead of None when the field 444 is missing. 445 """ 446 name = name.lower() 447 for k, v in self._headers: 448 if k.lower() == name: 449 return self.policy.header_fetch_parse(k, v) 450 return failobj 451 452 # 453 # "Internal" methods (public API, but only intended for use by a parser 454 # or generator, not normal application code. 455 # 456 457 def set_raw(self, name, value): 458 """Store name and value in the model without modification. 459 460 This is an "internal" API, intended only for use by a parser. 461 """ 462 self._headers.append((name, value)) 463 464 def raw_items(self): 465 """Return the (name, value) header pairs without modification. 466 467 This is an "internal" API, intended only for use by a generator. 468 """ 469 return iter(self._headers.copy()) 470 471 # 472 # Additional useful stuff 473 # 474 475 def get_all(self, name, failobj=None): 476 """Return a list of all the values for the named field. 477 478 These will be sorted in the order they appeared in the original 479 message, and may contain duplicates. Any fields deleted and 480 re-inserted are always appended to the header list. 481 482 If no such fields exist, failobj is returned (defaults to None). 483 """ 484 values = [] 485 name = name.lower() 486 for k, v in self._headers: 487 if k.lower() == name: 488 values.append(self.policy.header_fetch_parse(k, v)) 489 if not values: 490 return failobj 491 return values 492 493 def add_header(self, _name, _value, **_params): 494 """Extended header setting. 495 496 name is the header field to add. keyword arguments can be used to set 497 additional parameters for the header field, with underscores converted 498 to dashes. Normally the parameter will be added as key="value" unless 499 value is None, in which case only the key will be added. If a 500 parameter value contains non-ASCII characters it can be specified as a 501 three-tuple of (charset, language, value), in which case it will be 502 encoded according to RFC2231 rules. Otherwise it will be encoded using 503 the utf-8 charset and a language of ''. 504 505 Examples: 506 507 msg.add_header('content-disposition', 'attachment', filename='bud.gif') 508 msg.add_header('content-disposition', 'attachment', 509 filename=('utf-8', '', Fußballer.ppt')) 510 msg.add_header('content-disposition', 'attachment', 511 filename='Fußballer.ppt')) 512 """ 513 parts = [] 514 for k, v in _params.items(): 515 if v is None: 516 parts.append(k.replace('_', '-')) 517 else: 518 parts.append(_formatparam(k.replace('_', '-'), v)) 519 if _value is not None: 520 parts.insert(0, _value) 521 self[_name] = SEMISPACE.join(parts) 522 523 def replace_header(self, _name, _value): 524 """Replace a header. 525 526 Replace the first matching header found in the message, retaining 527 header order and case. If no matching header was found, a KeyError is 528 raised. 529 """ 530 _name = _name.lower() 531 for i, (k, v) in zip(range(len(self._headers)), self._headers): 532 if k.lower() == _name: 533 self._headers[i] = self.policy.header_store_parse(k, _value) 534 break 535 else: 536 raise KeyError(_name) 537 538 # 539 # Use these three methods instead of the three above. 540 # 541 542 def get_content_type(self): 543 """Return the message's content type. 544 545 The returned string is coerced to lower case of the form 546 `maintype/subtype'. If there was no Content-Type header in the 547 message, the default type as given by get_default_type() will be 548 returned. Since according to RFC 2045, messages always have a default 549 type this will always return a value. 550 551 RFC 2045 defines a message's default type to be text/plain unless it 552 appears inside a multipart/digest container, in which case it would be 553 message/rfc822. 554 """ 555 missing = object() 556 value = self.get('content-type', missing) 557 if value is missing: 558 # This should have no parameters 559 return self.get_default_type() 560 ctype = _splitparam(value)[0].lower() 561 # RFC 2045, section 5.2 says if its invalid, use text/plain 562 if ctype.count('/') != 1: 563 return 'text/plain' 564 return ctype 565 566 def get_content_maintype(self): 567 """Return the message's main content type. 568 569 This is the `maintype' part of the string returned by 570 get_content_type(). 571 """ 572 ctype = self.get_content_type() 573 return ctype.split('/')[0] 574 575 def get_content_subtype(self): 576 """Returns the message's sub-content type. 577 578 This is the `subtype' part of the string returned by 579 get_content_type(). 580 """ 581 ctype = self.get_content_type() 582 return ctype.split('/')[1] 583 584 def get_default_type(self): 585 """Return the `default' content type. 586 587 Most messages have a default content type of text/plain, except for 588 messages that are subparts of multipart/digest containers. Such 589 subparts have a default content type of message/rfc822. 590 """ 591 return self._default_type 592 593 def set_default_type(self, ctype): 594 """Set the `default' content type. 595 596 ctype should be either "text/plain" or "message/rfc822", although this 597 is not enforced. The default content type is not stored in the 598 Content-Type header. 599 """ 600 self._default_type = ctype 601 602 def _get_params_preserve(self, failobj, header): 603 # Like get_params() but preserves the quoting of values. BAW: 604 # should this be part of the public interface? 605 missing = object() 606 value = self.get(header, missing) 607 if value is missing: 608 return failobj 609 params = [] 610 for p in _parseparam(value): 611 try: 612 name, val = p.split('=', 1) 613 name = name.strip() 614 val = val.strip() 615 except ValueError: 616 # Must have been a bare attribute 617 name = p.strip() 618 val = '' 619 params.append((name, val)) 620 params = utils.decode_params(params) 621 return params 622 623 def get_params(self, failobj=None, header='content-type', unquote=True): 624 """Return the message's Content-Type parameters, as a list. 625 626 The elements of the returned list are 2-tuples of key/value pairs, as 627 split on the `=' sign. The left hand side of the `=' is the key, 628 while the right hand side is the value. If there is no `=' sign in 629 the parameter the value is the empty string. The value is as 630 described in the get_param() method. 631 632 Optional failobj is the object to return if there is no Content-Type 633 header. Optional header is the header to search instead of 634 Content-Type. If unquote is True, the value is unquoted. 635 """ 636 missing = object() 637 params = self._get_params_preserve(missing, header) 638 if params is missing: 639 return failobj 640 if unquote: 641 return [(k, _unquotevalue(v)) for k, v in params] 642 else: 643 return params 644 645 def get_param(self, param, failobj=None, header='content-type', 646 unquote=True): 647 """Return the parameter value if found in the Content-Type header. 648 649 Optional failobj is the object to return if there is no Content-Type 650 header, or the Content-Type header has no such parameter. Optional 651 header is the header to search instead of Content-Type. 652 653 Parameter keys are always compared case insensitively. The return 654 value can either be a string, or a 3-tuple if the parameter was RFC 655 2231 encoded. When it's a 3-tuple, the elements of the value are of 656 the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and 657 LANGUAGE can be None, in which case you should consider VALUE to be 658 encoded in the us-ascii charset. You can usually ignore LANGUAGE. 659 The parameter value (either the returned string, or the VALUE item in 660 the 3-tuple) is always unquoted, unless unquote is set to False. 661 662 If your application doesn't care whether the parameter was RFC 2231 663 encoded, it can turn the return value into a string as follows: 664 665 rawparam = msg.get_param('foo') 666 param = email.utils.collapse_rfc2231_value(rawparam) 667 668 """ 669 if header not in self: 670 return failobj 671 for k, v in self._get_params_preserve(failobj, header): 672 if k.lower() == param.lower(): 673 if unquote: 674 return _unquotevalue(v) 675 else: 676 return v 677 return failobj 678 679 def set_param(self, param, value, header='Content-Type', requote=True, 680 charset=None, language='', replace=False): 681 """Set a parameter in the Content-Type header. 682 683 If the parameter already exists in the header, its value will be 684 replaced with the new value. 685 686 If header is Content-Type and has not yet been defined for this 687 message, it will be set to "text/plain" and the new parameter and 688 value will be appended as per RFC 2045. 689 690 An alternate header can specified in the header argument, and all 691 parameters will be quoted as necessary unless requote is False. 692 693 If charset is specified, the parameter will be encoded according to RFC 694 2231. Optional language specifies the RFC 2231 language, defaulting 695 to the empty string. Both charset and language should be strings. 696 """ 697 if not isinstance(value, tuple) and charset: 698 value = (charset, language, value) 699 700 if header not in self and header.lower() == 'content-type': 701 ctype = 'text/plain' 702 else: 703 ctype = self.get(header) 704 if not self.get_param(param, header=header): 705 if not ctype: 706 ctype = _formatparam(param, value, requote) 707 else: 708 ctype = SEMISPACE.join( 709 [ctype, _formatparam(param, value, requote)]) 710 else: 711 ctype = '' 712 for old_param, old_value in self.get_params(header=header, 713 unquote=requote): 714 append_param = '' 715 if old_param.lower() == param.lower(): 716 append_param = _formatparam(param, value, requote) 717 else: 718 append_param = _formatparam(old_param, old_value, requote) 719 if not ctype: 720 ctype = append_param 721 else: 722 ctype = SEMISPACE.join([ctype, append_param]) 723 if ctype != self.get(header): 724 if replace: 725 self.replace_header(header, ctype) 726 else: 727 del self[header] 728 self[header] = ctype 729 730 def del_param(self, param, header='content-type', requote=True): 731 """Remove the given parameter completely from the Content-Type header. 732 733 The header will be re-written in place without the parameter or its 734 value. All values will be quoted as necessary unless requote is 735 False. Optional header specifies an alternative to the Content-Type 736 header. 737 """ 738 if header not in self: 739 return 740 new_ctype = '' 741 for p, v in self.get_params(header=header, unquote=requote): 742 if p.lower() != param.lower(): 743 if not new_ctype: 744 new_ctype = _formatparam(p, v, requote) 745 else: 746 new_ctype = SEMISPACE.join([new_ctype, 747 _formatparam(p, v, requote)]) 748 if new_ctype != self.get(header): 749 del self[header] 750 self[header] = new_ctype 751 752 def set_type(self, type, header='Content-Type', requote=True): 753 """Set the main type and subtype for the Content-Type header. 754 755 type must be a string in the form "maintype/subtype", otherwise a 756 ValueError is raised. 757 758 This method replaces the Content-Type header, keeping all the 759 parameters in place. If requote is False, this leaves the existing 760 header's quoting as is. Otherwise, the parameters will be quoted (the 761 default). 762 763 An alternative header can be specified in the header argument. When 764 the Content-Type header is set, we'll always also add a MIME-Version 765 header. 766 """ 767 # BAW: should we be strict? 768 if not type.count('/') == 1: 769 raise ValueError 770 # Set the Content-Type, you get a MIME-Version 771 if header.lower() == 'content-type': 772 del self['mime-version'] 773 self['MIME-Version'] = '1.0' 774 if header not in self: 775 self[header] = type 776 return 777 params = self.get_params(header=header, unquote=requote) 778 del self[header] 779 self[header] = type 780 # Skip the first param; it's the old type. 781 for p, v in params[1:]: 782 self.set_param(p, v, header, requote) 783 784 def get_filename(self, failobj=None): 785 """Return the filename associated with the payload if present. 786 787 The filename is extracted from the Content-Disposition header's 788 `filename' parameter, and it is unquoted. If that header is missing 789 the `filename' parameter, this method falls back to looking for the 790 `name' parameter. 791 """ 792 missing = object() 793 filename = self.get_param('filename', missing, 'content-disposition') 794 if filename is missing: 795 filename = self.get_param('name', missing, 'content-type') 796 if filename is missing: 797 return failobj 798 return utils.collapse_rfc2231_value(filename).strip() 799 800 def get_boundary(self, failobj=None): 801 """Return the boundary associated with the payload if present. 802 803 The boundary is extracted from the Content-Type header's `boundary' 804 parameter, and it is unquoted. 805 """ 806 missing = object() 807 boundary = self.get_param('boundary', missing) 808 if boundary is missing: 809 return failobj 810 # RFC 2046 says that boundaries may begin but not end in w/s 811 return utils.collapse_rfc2231_value(boundary).rstrip() 812 813 def set_boundary(self, boundary): 814 """Set the boundary parameter in Content-Type to 'boundary'. 815 816 This is subtly different than deleting the Content-Type header and 817 adding a new one with a new boundary parameter via add_header(). The 818 main difference is that using the set_boundary() method preserves the 819 order of the Content-Type header in the original message. 820 821 HeaderParseError is raised if the message has no Content-Type header. 822 """ 823 missing = object() 824 params = self._get_params_preserve(missing, 'content-type') 825 if params is missing: 826 # There was no Content-Type header, and we don't know what type 827 # to set it to, so raise an exception. 828 raise errors.HeaderParseError('No Content-Type header found') 829 newparams = [] 830 foundp = False 831 for pk, pv in params: 832 if pk.lower() == 'boundary': 833 newparams.append(('boundary', '"%s"' % boundary)) 834 foundp = True 835 else: 836 newparams.append((pk, pv)) 837 if not foundp: 838 # The original Content-Type header had no boundary attribute. 839 # Tack one on the end. BAW: should we raise an exception 840 # instead??? 841 newparams.append(('boundary', '"%s"' % boundary)) 842 # Replace the existing Content-Type header with the new value 843 newheaders = [] 844 for h, v in self._headers: 845 if h.lower() == 'content-type': 846 parts = [] 847 for k, v in newparams: 848 if v == '': 849 parts.append(k) 850 else: 851 parts.append('%s=%s' % (k, v)) 852 val = SEMISPACE.join(parts) 853 newheaders.append(self.policy.header_store_parse(h, val)) 854 855 else: 856 newheaders.append((h, v)) 857 self._headers = newheaders 858 859 def get_content_charset(self, failobj=None): 860 """Return the charset parameter of the Content-Type header. 861 862 The returned string is always coerced to lower case. If there is no 863 Content-Type header, or if that header has no charset parameter, 864 failobj is returned. 865 """ 866 missing = object() 867 charset = self.get_param('charset', missing) 868 if charset is missing: 869 return failobj 870 if isinstance(charset, tuple): 871 # RFC 2231 encoded, so decode it, and it better end up as ascii. 872 pcharset = charset[0] or 'us-ascii' 873 try: 874 # LookupError will be raised if the charset isn't known to 875 # Python. UnicodeError will be raised if the encoded text 876 # contains a character not in the charset. 877 as_bytes = charset[2].encode('raw-unicode-escape') 878 charset = str(as_bytes, pcharset) 879 except (LookupError, UnicodeError): 880 charset = charset[2] 881 # charset characters must be in us-ascii range 882 try: 883 charset.encode('us-ascii') 884 except UnicodeError: 885 return failobj 886 # RFC 2046, $4.1.2 says charsets are not case sensitive 887 return charset.lower() 888 889 def get_charsets(self, failobj=None): 890 """Return a list containing the charset(s) used in this message. 891 892 The returned list of items describes the Content-Type headers' 893 charset parameter for this message and all the subparts in its 894 payload. 895 896 Each item will either be a string (the value of the charset parameter 897 in the Content-Type header of that part) or the value of the 898 'failobj' parameter (defaults to None), if the part does not have a 899 main MIME type of "text", or the charset is not defined. 900 901 The list will contain one string for each part of the message, plus 902 one for the container message (i.e. self), so that a non-multipart 903 message will still return a list of length 1. 904 """ 905 return [part.get_content_charset(failobj) for part in self.walk()] 906 907 # I.e. def walk(self): ... 908 from email.iterators import walk 909 910 911 class MIMEPart(Message): 912 913 def __init__(self, policy=None): 914 if policy is None: 915 from email.policy import default 916 policy = default 917 Message.__init__(self, policy) 918 919 @property 920 def is_attachment(self): 921 c_d = self.get('content-disposition') 922 if c_d is None: 923 return False 924 return c_d.lower() == 'attachment' 925 926 def _find_body(self, part, preferencelist): 927 if part.is_attachment: 928 return 929 maintype, subtype = part.get_content_type().split('/') 930 if maintype == 'text': 931 if subtype in preferencelist: 932 yield (preferencelist.index(subtype), part) 933 return 934 if maintype != 'multipart': 935 return 936 if subtype != 'related': 937 for subpart in part.iter_parts(): 938 yield from self._find_body(subpart, preferencelist) 939 return 940 if 'related' in preferencelist: 941 yield (preferencelist.index('related'), part) 942 candidate = None 943 start = part.get_param('start') 944 if start: 945 for subpart in part.iter_parts(): 946 if subpart['content-id'] == start: 947 candidate = subpart 948 break 949 if candidate is None: 950 subparts = part.get_payload() 951 candidate = subparts[0] if subparts else None 952 if candidate is not None: 953 yield from self._find_body(candidate, preferencelist) 954 955 def get_body(self, preferencelist=('related', 'html', 'plain')): 956 """Return best candidate mime part for display as 'body' of message. 957 958 Do a depth first search, starting with self, looking for the first part 959 matching each of the items in preferencelist, and return the part 960 corresponding to the first item that has a match, or None if no items 961 have a match. If 'related' is not included in preferencelist, consider 962 the root part of any multipart/related encountered as a candidate 963 match. Ignore parts with 'Content-Disposition: attachment'. 964 """ 965 best_prio = len(preferencelist) 966 body = None 967 for prio, part in self._find_body(self, preferencelist): 968 if prio < best_prio: 969 best_prio = prio 970 body = part 971 if prio == 0: 972 break 973 return body 974 975 _body_types = {('text', 'plain'), 976 ('text', 'html'), 977 ('multipart', 'related'), 978 ('multipart', 'alternative')} 979 def iter_attachments(self): 980 """Return an iterator over the non-main parts of a multipart. 981 982 Skip the first of each occurrence of text/plain, text/html, 983 multipart/related, or multipart/alternative in the multipart (unless 984 they have a 'Content-Disposition: attachment' header) and include all 985 remaining subparts in the returned iterator. When applied to a 986 multipart/related, return all parts except the root part. Return an 987 empty iterator when applied to a multipart/alternative or a 988 non-multipart. 989 """ 990 maintype, subtype = self.get_content_type().split('/') 991 if maintype != 'multipart' or subtype == 'alternative': 992 return 993 parts = self.get_payload() 994 if maintype == 'multipart' and subtype == 'related': 995 # For related, we treat everything but the root as an attachment. 996 # The root may be indicated by 'start'; if there's no start or we 997 # can't find the named start, treat the first subpart as the root. 998 start = self.get_param('start') 999 if start: 1000 found = False 1001 attachments = [] 1002 for part in parts: 1003 if part.get('content-id') == start: 1004 found = True 1005 else: 1006 attachments.append(part) 1007 if found: 1008 yield from attachments 1009 return 1010 parts.pop(0) 1011 yield from parts 1012 return 1013 # Otherwise we more or less invert the remaining logic in get_body. 1014 # This only really works in edge cases (ex: non-text relateds or 1015 # alternatives) if the sending agent sets content-disposition. 1016 seen = [] # Only skip the first example of each candidate type. 1017 for part in parts: 1018 maintype, subtype = part.get_content_type().split('/') 1019 if ((maintype, subtype) in self._body_types and 1020 not part.is_attachment and subtype not in seen): 1021 seen.append(subtype) 1022 continue 1023 yield part 1024 1025 def iter_parts(self): 1026 """Return an iterator over all immediate subparts of a multipart. 1027 1028 Return an empty iterator for a non-multipart. 1029 """ 1030 if self.get_content_maintype() == 'multipart': 1031 yield from self.get_payload() 1032 1033 def get_content(self, *args, content_manager=None, **kw): 1034 if content_manager is None: 1035 content_manager = self.policy.content_manager 1036 return content_manager.get_content(self, *args, **kw) 1037 1038 def set_content(self, *args, content_manager=None, **kw): 1039 if content_manager is None: 1040 content_manager = self.policy.content_manager 1041 content_manager.set_content(self, *args, **kw) 1042 1043 def _make_multipart(self, subtype, disallowed_subtypes, boundary): 1044 if self.get_content_maintype() == 'multipart': 1045 existing_subtype = self.get_content_subtype() 1046 disallowed_subtypes = disallowed_subtypes + (subtype,) 1047 if existing_subtype in disallowed_subtypes: 1048 raise ValueError("Cannot convert {} to {}".format( 1049 existing_subtype, subtype)) 1050 keep_headers = [] 1051 part_headers = [] 1052 for name, value in self._headers: 1053 if name.lower().startswith('content-'): 1054 part_headers.append((name, value)) 1055 else: 1056 keep_headers.append((name, value)) 1057 if part_headers: 1058 # There is existing content, move it to the first subpart. 1059 part = type(self)(policy=self.policy) 1060 part._headers = part_headers 1061 part._payload = self._payload 1062 self._payload = [part] 1063 else: 1064 self._payload = [] 1065 self._headers = keep_headers 1066 self['Content-Type'] = 'multipart/' + subtype 1067 if boundary is not None: 1068 self.set_param('boundary', boundary) 1069 1070 def make_related(self, boundary=None): 1071 self._make_multipart('related', ('alternative', 'mixed'), boundary) 1072 1073 def make_alternative(self, boundary=None): 1074 self._make_multipart('alternative', ('mixed',), boundary) 1075 1076 def make_mixed(self, boundary=None): 1077 self._make_multipart('mixed', (), boundary) 1078 1079 def _add_multipart(self, _subtype, *args, _disp=None, **kw): 1080 if (self.get_content_maintype() != 'multipart' or 1081 self.get_content_subtype() != _subtype): 1082 getattr(self, 'make_' + _subtype)() 1083 part = type(self)(policy=self.policy) 1084 part.set_content(*args, **kw) 1085 if _disp and 'content-disposition' not in part: 1086 part['Content-Disposition'] = _disp 1087 self.attach(part) 1088 1089 def add_related(self, *args, **kw): 1090 self._add_multipart('related', *args, _disp='inline', **kw) 1091 1092 def add_alternative(self, *args, **kw): 1093 self._add_multipart('alternative', *args, **kw) 1094 1095 def add_attachment(self, *args, **kw): 1096 self._add_multipart('mixed', *args, _disp='attachment', **kw) 1097 1098 def clear(self): 1099 self._headers = [] 1100 self._payload = None 1101 1102 def clear_content(self): 1103 self._headers = [(n, v) for n, v in self._headers 1104 if not n.lower().startswith('content-')] 1105 self._payload = None 1106 1107 1108 class EmailMessage(MIMEPart): 1109 1110 def set_content(self, *args, **kw): 1111 super().set_content(*args, **kw) 1112 if 'MIME-Version' not in self: 1113 self['MIME-Version'] = '1.0' |