res = requests.get(url)html = etree.HTML(res.text)contents = html.xpaht('//div/xxxx')
然后遇到了如下的錯誤信息:
Traceback (most recent call last):File "xxxxxxxx.py", line 157, in <module>get_website_title_content(url)File "xxxxxxxx.py", line 141, in get_website_title_contenthtml = etree.HTML(html_text)File "src\lxml\etree.pyx", line 3170, in lxml.etree.HTMLFile "src\lxml\parser.pxi", line 1872, in lxml.etree._parseMemoryDocumentValueError: Unicode strings with encoding declaration are not supported. Please use bytes input or XML fragments without declaration.
關鍵錯誤就是 ValueError: Unicode strings with encoding declaration are not supported. Please use bytes input or XML fragments without declaration.
@propertydef content(self):"""Content of the response, in bytes."""if self._content is False:# Read the contents.if self._content_consumed:raise RuntimeError('The content for this response was already consumed')if self.status_code == 0 or self.raw is None:self._content = Noneelse:self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''self._content_consumed = True# don't need to release the connection; that's been handled by urllib3# since we exhausted the data.return self._content@propertydef text(self):"""Content of the response, in unicode.If Response.encoding is None, encoding will be guessed using``chardet``.The encoding of the response content is determined based solely on HTTPheaders, following RFC 2616 to the letter. If you can take advantage ofnon-HTTP knowledge to make a better guess at the encoding, you shouldset ``r.encoding`` appropriately before accessing this property."""# Try charset from content-typecontent = Noneencoding = self.encodingif not self.content:return str('')# Fallback to auto-detected encoding.if self.encoding is None:encoding = self.apparent_encoding# Decode unicode from given encoding.try:content = str(self.content, encoding, errors='replace')except (LookupError, TypeError):# A LookupError is raised if the encoding was not found which could# indicate a misspelling or similar mistake.## A TypeError can be raised if encoding is None## So we try blindly encoding.content = str(self.content, errors='replace')return content