当前位置: 首页 > news >正文

广告优化师前景手机seo排名软件

广告优化师前景,手机seo排名软件,做电话销售需要的网站,论学院网站建设项目的进度管理内容仅供学习参考,如有侵权联系删除 先通过京东非自营的店铺名拿到的公司名,再通过公司名称去其他平台拿到联系方式(代码省略) from aioscrapy.spiders import Spider from aioscrapy.http import Request, FormRequest import dd…

内容仅供学习参考,如有侵权联系删除

先通过京东非自营的店铺名拿到的公司名,再通过公司名称去其他平台拿到联系方式(代码省略)


from aioscrapy.spiders import Spider
from aioscrapy.http import Request, FormRequest
import ddddocr
import re
import randomfrom loguru import loggerclass JingDongSpider(Spider):name = 'products:jd'custom_settings = {'CONCURRENT_REQUESTS': 4,# 'DOWNLOAD_DELAY': 0.5,'DOWNLOAD_TIMEOUT': 10,'RETRY_TIMES': 5,'HTTPERROR_ALLOWED_CODES': [503],'COOKIES_ENABLED': False,'DUPEFILTER_CLASS': 'aioscrapy.dupefilters.redis.RFPDupeFilter',  # 过滤方法# 'LOG_LEVEL': 'DEBUG'}ocr = ddddocr.DdddOcr(show_ad=False, use_gpu=True)async def start_requests(self):yield Request(url=f"https://mall.jd.com/index-11111111.html?from=pc",method='GET',dont_filter=False,# fingerprint=str(i),# meta={"shop_id": str(i)},priority=500)async def parse(self, response):"""店铺首页"""title = response.xpath('//title/text()').get() or ''shop_id = str(response.meta['shop_id'])if '您所访问的页面不存在' in str(title) or len(response.text) < 25000:logger.info(f"{shop_id}")returnlogger.info(title.strip())product_list = self.get_product_items(response)urls = re.findall(r"//\w+\.jd\.com/view_search-\d+-\d+-\d+-\d+-\d+-\d+\.html", response.text)yield Request(url=f"https://mall.jd.com/sys/vc/createVerifyCode.html?random={random.random()}",method='GET',callback=self.parse_img_code,dont_filter=True,meta={"data": {"product_url": 'https:' + urls[0] if urls else '',"categorys": self.get_category(response),"product_list": product_list,# "shop_url": response.url,"shop_id": shop_id}},priority=500)async def parse_img_code(self, response):"""验证码"""code = self.ocr.classification(response.body)cookie = dict(response.cookies.items())shop_id = response.meta["data"]["shop_id"]if not code or not cookie:returnyield FormRequest(url=f'https://mall.jd.com/showLicence-{shop_id}.html',method='POST',formdata={"verifyCode": str(code)},cookies=cookie,meta={"data": response.meta["data"]},callback=self.parse_shop_detail,dont_filter=True,priority=400)async def parse_shop_detail(self, response):""" 解析店铺详情"""company = response.xpath('//*[contains(.,"企业名称:")]/following-sibling::span[position()=1]/text()').get() or ''shop_name = response.xpath('//*[contains(.,"店铺名称:")]/following-sibling::span[position()=1]//text()').get() or ''shop_url = response.xpath('//*[contains(.,"店铺网址:")]/following-sibling::span[position()=1]//text()').get()# legal_person = response.xpath( '//*[contains(.,"法定代表人姓名:")]/following-sibling::span[position()=1]//text()').get()# business_scope = response.xpath( '//*[contains(.,"营业执照经营范围:")]/following-sibling::span[position()=1]//text()').get()license = response.xpath('//img[@class="qualification-img"]/@src').get() or ''if not company or '测试' in shop_name or '测试' in company:if not company:logger.info(f"无公司: {response.url}")else:logger.info(f" {shop_name} => {company}")returnelse:logger.info(company)data = response.meta['data']data['company'] = companydata['shop_name'] = shop_nameitems = dict(company=company,shop_name=shop_name,shop_url='https:' + shop_url if shop_url else response.url,product_url=data['product_url'],shop_id=data['shop_id'],push_kafka_status=0,license='https:' + license if license else '',)if len(data['product_list']) < 1:if data['product_url']:yield Request(url=data['product_url'],method='GET',meta={"data": data},callback=self.parse_product,dont_filter=True,priority=300)else:logger.warning(f"获取不到产品链接:{response.url}")items.pop('product_url')yield itemselse:product_list = []for item in data['product_list']:item['entityId'] = companyproduct_list.append(item)yield dict(source='jd.com',ocid='',entityId=company,product=product_list,)items['push_kafka_status'] = 1yield itemsasync def parse_product(self, response):"""解析产品页"""data = response.meta['data']shop_name = data['shop_name']company = data['company']categorys = data['categorys']product_list = self.get_product_items(response, shop_name, company, categorys, data['product_url'])if product_list:yield dict(source='jd.com',ocid='',entityId=company,product=product_list,)logger.info(f"成功: {company} => {data['shop_id']}")yield dict(company=company,shop_id=data['shop_id'],push_kafka_status=1,)else:logger.error(f"{response.url} => {data['shop_id']}")def get_product_items(self, response, shop_name='', company='', categorys='', shop_url='') -> list:ul = response.xpath('//li[@class="jSubObject"] | //li[@class="jSubObject gl-item"] | //div[@class="jItem"]')product_list = []for li in ul[:10]:title = li.xpath('.//div[@class="jDesc"]/a/@title').get() or ''# price = li.xpath('.//span[@class="jdNum"]/text()').get()img = str(li.xpath('.//div[@class="jPic"]//img/@src').get() or '').replace('s350x350', '')if not title and not img:continueif img:img = re.sub(r"/n[23456789]/", "/n1/", img)img = 'https:' + imgitem_i = {}item_i["entityId"] = companyitem_i["productPic"] = img.replace('s350x350', '')item_i["productName"] = title  # 产品名称item_i["productCategory"] = ""  # 产品分类item_i["productKeyword"] = ""  # 产品关键词item_i["productPrice"] = ""  # 产品价格item_i["mainProducts"] = categorys  # 主营产品item_i["listingPlatform"] = "京东"item_i["productShopName"] = shop_name  # 产品所属店铺名item_i["dataLink"] = shop_url or response.url  # 店铺链接product_list.append(item_i)return product_list@staticmethoddef get_category(response) -> str:categorys = response.xpath('//ul[@class="menu-list"]/li[@class="menu"]/a/text() | //div[@class="abs"]//div[@class="ins abs hdur_2"]/a/text()').getall()category = []for i in categorys:if '首页' in i or '全部' in i or '所有' in i or '问题' in i or '指导' in i or '售后' in i or '撰文' in i:continuecategory.append(i)return ','.join(category)if __name__ == '__main__':JingDongSpider.start()

最后的数据

在这里插入图片描述

本内容仅限用于学习参考,不得用于商业目的。如有版权问题,请联系我们删除,谢谢!
欢迎一起学习讨论Q540513871

http://www.khdw.cn/news/15418.html

相关文章:

  • 先做网站后备案吗邢台市seo服务
  • 免费word模板下载哪个网站口碑营销属于什么营销
  • 做设计英文网站今日十大头条新闻
  • 做汽车的网站如何自己创建网站
  • 企业做网站分一般为哪几种类型温州seo品牌优化软件
  • wordpress加载条重庆高端seo
  • 做网站为什么需要营业执照专业的郑州网站推广
  • 怎么添加网站图标刷评论网站推广
  • php做的网站论文网站开发报价方案
  • 做个人网站到哪里做在哪里可以免费自学seo课程
  • wordpress 摄影订单关键词优化策略有哪些
  • 局域网WEB网站开发郑州做网站推广电话
  • 沈阳德泰诺网站制作网站运营管理
  • 怎样做网站导航栏百度经验官网首页
  • 制作网站的免费软件大型门户网站建设
  • 软件ui设计网站西安网络推广seo0515
  • 做网站经验徐州seo外包公司
  • 新网站如何做免费推广电脑网络优化软件
  • 昆明集团网站建设优秀网站设计
  • 天津企业网站建设开发维护关键词首页排名优化公司推荐
  • 大淘客网站上的推广怎么做市场营销推广策划
  • 南平网站怎么做seo在线刷关键词网站排名
  • 专业网页制作产品网络推广网站seo服务公司
  • 西宁网站制作哪家公司好原创文章代写
  • 建设九九网站深圳seo优化电话
  • 软件开发和前端开发的区别江西网络推广seo
  • 安徽平台网站建设设计营销的手段和方法
  • wordpress轮播图设置百度快照优化排名
  • dedecms网站的源码如何安装seo优化的搜索排名影响因素主要有
  • 网站百度流量怎么做网站做优化一开始怎么做