The crawler can't run, wrong type for undefined entity
up vote
0
down vote
favorite
import requests
import xml.etree.ElementTree as ET
from xml.parsers.expat import ParserCreate
class DefaultSaxHandler(object):
def __init__(self, provinces):
self.provinces = provinces
def start_element(self, name, attrs):
pass
def end_element(self, name):
pass
def char_data(self, text):
if text!='':
self.provinces.append(text)
def get_province_entry(url):
content = requests.get(url).content.decode('gb2312')
start = content.find('<table height="22" cellSpacing="0" cellPadding="0" width="710" border="0">')
end = content.find('<hr size="1" width="520">')
content = content[start:end].strip()
provinces=
handler = DefaultSaxHandler(provinces)
parser = ParserCreate()
parser.StartElementHandler = handler.start_element
parser.EndElementHandler = handler.end_element
parser.CharacterDataHandler = handler.char_data
parser.Parse(content)
return provinces
provinces = get_province_entry('http://www.ip138.com/post/')
print(provinces)
I am a novice, this is my imitate others wrote a web crawler, but it can't run, always make a mistake, I thought for a long time also didn't know where is wrong
Runtime error display are as follows:
ExpatError
Traceback (most recent call last)
<ipython-input-3-2a36bf818109> in <module>()
39 return provinces
40
---> 41 provinces = get_province_entry('http://www.ip138.com/post/')
42 print(provinces)
<ipython-input-3-2a36bf818109> in get_province_entry(url)
35 parser.EndElementHandler = handler.end_element
36 parser.CharacterDataHandler = handler.char_data
---> 37 parser.Parse(content)
38
39 return provinces
ExpatError: undefined entity: line 6, column 55
I don't know where I went wrong, because this is the first I wrote to imitate the crawler
python-3.x web-crawler
add a comment |
up vote
0
down vote
favorite
import requests
import xml.etree.ElementTree as ET
from xml.parsers.expat import ParserCreate
class DefaultSaxHandler(object):
def __init__(self, provinces):
self.provinces = provinces
def start_element(self, name, attrs):
pass
def end_element(self, name):
pass
def char_data(self, text):
if text!='':
self.provinces.append(text)
def get_province_entry(url):
content = requests.get(url).content.decode('gb2312')
start = content.find('<table height="22" cellSpacing="0" cellPadding="0" width="710" border="0">')
end = content.find('<hr size="1" width="520">')
content = content[start:end].strip()
provinces=
handler = DefaultSaxHandler(provinces)
parser = ParserCreate()
parser.StartElementHandler = handler.start_element
parser.EndElementHandler = handler.end_element
parser.CharacterDataHandler = handler.char_data
parser.Parse(content)
return provinces
provinces = get_province_entry('http://www.ip138.com/post/')
print(provinces)
I am a novice, this is my imitate others wrote a web crawler, but it can't run, always make a mistake, I thought for a long time also didn't know where is wrong
Runtime error display are as follows:
ExpatError
Traceback (most recent call last)
<ipython-input-3-2a36bf818109> in <module>()
39 return provinces
40
---> 41 provinces = get_province_entry('http://www.ip138.com/post/')
42 print(provinces)
<ipython-input-3-2a36bf818109> in get_province_entry(url)
35 parser.EndElementHandler = handler.end_element
36 parser.CharacterDataHandler = handler.char_data
---> 37 parser.Parse(content)
38
39 return provinces
ExpatError: undefined entity: line 6, column 55
I don't know where I went wrong, because this is the first I wrote to imitate the crawler
python-3.x web-crawler
My goal is to crawl a web page text value part of the area
– snow snow
Nov 12 at 11:01
add a comment |
up vote
0
down vote
favorite
up vote
0
down vote
favorite
import requests
import xml.etree.ElementTree as ET
from xml.parsers.expat import ParserCreate
class DefaultSaxHandler(object):
def __init__(self, provinces):
self.provinces = provinces
def start_element(self, name, attrs):
pass
def end_element(self, name):
pass
def char_data(self, text):
if text!='':
self.provinces.append(text)
def get_province_entry(url):
content = requests.get(url).content.decode('gb2312')
start = content.find('<table height="22" cellSpacing="0" cellPadding="0" width="710" border="0">')
end = content.find('<hr size="1" width="520">')
content = content[start:end].strip()
provinces=
handler = DefaultSaxHandler(provinces)
parser = ParserCreate()
parser.StartElementHandler = handler.start_element
parser.EndElementHandler = handler.end_element
parser.CharacterDataHandler = handler.char_data
parser.Parse(content)
return provinces
provinces = get_province_entry('http://www.ip138.com/post/')
print(provinces)
I am a novice, this is my imitate others wrote a web crawler, but it can't run, always make a mistake, I thought for a long time also didn't know where is wrong
Runtime error display are as follows:
ExpatError
Traceback (most recent call last)
<ipython-input-3-2a36bf818109> in <module>()
39 return provinces
40
---> 41 provinces = get_province_entry('http://www.ip138.com/post/')
42 print(provinces)
<ipython-input-3-2a36bf818109> in get_province_entry(url)
35 parser.EndElementHandler = handler.end_element
36 parser.CharacterDataHandler = handler.char_data
---> 37 parser.Parse(content)
38
39 return provinces
ExpatError: undefined entity: line 6, column 55
I don't know where I went wrong, because this is the first I wrote to imitate the crawler
python-3.x web-crawler
import requests
import xml.etree.ElementTree as ET
from xml.parsers.expat import ParserCreate
class DefaultSaxHandler(object):
def __init__(self, provinces):
self.provinces = provinces
def start_element(self, name, attrs):
pass
def end_element(self, name):
pass
def char_data(self, text):
if text!='':
self.provinces.append(text)
def get_province_entry(url):
content = requests.get(url).content.decode('gb2312')
start = content.find('<table height="22" cellSpacing="0" cellPadding="0" width="710" border="0">')
end = content.find('<hr size="1" width="520">')
content = content[start:end].strip()
provinces=
handler = DefaultSaxHandler(provinces)
parser = ParserCreate()
parser.StartElementHandler = handler.start_element
parser.EndElementHandler = handler.end_element
parser.CharacterDataHandler = handler.char_data
parser.Parse(content)
return provinces
provinces = get_province_entry('http://www.ip138.com/post/')
print(provinces)
I am a novice, this is my imitate others wrote a web crawler, but it can't run, always make a mistake, I thought for a long time also didn't know where is wrong
Runtime error display are as follows:
ExpatError
Traceback (most recent call last)
<ipython-input-3-2a36bf818109> in <module>()
39 return provinces
40
---> 41 provinces = get_province_entry('http://www.ip138.com/post/')
42 print(provinces)
<ipython-input-3-2a36bf818109> in get_province_entry(url)
35 parser.EndElementHandler = handler.end_element
36 parser.CharacterDataHandler = handler.char_data
---> 37 parser.Parse(content)
38
39 return provinces
ExpatError: undefined entity: line 6, column 55
I don't know where I went wrong, because this is the first I wrote to imitate the crawler
python-3.x web-crawler
python-3.x web-crawler
asked Nov 12 at 10:52
snow snow
11
11
My goal is to crawl a web page text value part of the area
– snow snow
Nov 12 at 11:01
add a comment |
My goal is to crawl a web page text value part of the area
– snow snow
Nov 12 at 11:01
My goal is to crawl a web page text value part of the area
– snow snow
Nov 12 at 11:01
My goal is to crawl a web page text value part of the area
– snow snow
Nov 12 at 11:01
add a comment |
active
oldest
votes
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53260605%2fthe-crawler-cant-run-wrong-type-for-undefined-entity%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
active
oldest
votes
active
oldest
votes
active
oldest
votes
active
oldest
votes
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53260605%2fthe-crawler-cant-run-wrong-type-for-undefined-entity%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
My goal is to crawl a web page text value part of the area
– snow snow
Nov 12 at 11:01