Python script for uploading RSS feeds
Attention. To work with a script you need the knowledge of Python.
The Python script allows you to upload an RSS feed to Yandex.Webmaster. The script consistently sends requests to the Yandex.Turbo pages API and reports the result of the RSS feed upload.
To work with the script, you only need to specify the site address, your OAuth token and the RSS feed content. Other data (user-id, host-id and so on) are received by the script automatically.
Setting up compression
To send the RSS feed in a compressed form, specify the
Content-Encoding: gzip
title in the upload_rss
function....
def upload_rss(upload_path, rss_data):
headers = {
'Content-Type': 'application/rss+xml',
'Content-Encoding': 'gzip'
}
...
Setting up the upload mode
The upload mode is set in theget_rss_upload_path
function when declaring the path
variable. The debugging mode PRODUCTION is set by default....
def get_rss_upload_path(user_id, host_id):
path = '/user/{user_id}/hosts/{host_id}/turbo/uploadAddress/?mode={mode}'.format(
user_id=user_id, host_id=host_id, mode='PRODUCTION')
...
To debug Turbo pages, set the DEBUG mode.
...
def get_rss_upload_path(user_id, host_id):
path = '/user/{user_id}/hosts/{host_id}/turbo/uploadAddress/?mode={mode}'.format(
user_id=user_id, host_id=host_id, mode='DEBUG')
...
Using the script
To load the RSS feed, add your own data to the script:
- The URL of the site the RSS feed is loaded for.
- OAuth token. For more information about getting a token, see Authorization.
RSS feed content.
For a test run you can use the sample RSS feed.Example<?xml version = "1.0" encoding = "UTF-8"?> <rss version="2.0" xmlns:yandex="http://news.yandex.ru" xmlns:turbo="http://turbo.yandex.ru"> <channel> <item turbo="true"> <title>Page title</title> <link>https://example.com</link> <turbo:content> <![CDATA[ <header> <h1>The Healthy Breakfast restaurant</h1> <h2>Healthy and delicious</h2> <figure> <img src="https://avatars.mds.yandex.net/get-sbs-sd/403988/e6f459c3-8ada-44bf-a6c9-dbceb60f3757/orig"> </figure> <menu> <a href="https://example.com/page1.html">Menu item 1</a> <a href="https://example.com/page2.html">Menu item 2</a> </menu> </header> <p>What's a good way to start the day? With a delicious and healthy breakfast!</p> <p>Join us for breakfast. Check out photos of our dishes <a href="#">on our website</a>.</p> <h2>Menu</h2> <figure> <img src="https://avatars.mds.yandex.net/get-sbs-sd/369181/49e3683c-ef58-4067-91f9-786222aa0e65/orig"> <figcaption>Omelette with herbs</figcaption> </figure> <p>Our menu always contains fresh, delicious and healthy options.</p> <p>Find out for yourself.</p> <button formaction="tel:+7(123)456-78-90" data-background-color="#5B97B0" data-color="white" data-primary="true">Reserve a table</button> <div data-block="widget-feedback" data-stick="false"> <div data-block="chat" data-type="whatsapp" data-url="https://whatsapp.com"></div> <div data-block="chat" data-type="telegram" data-url="http://telegram.com/"></div> <div data-block="chat" data-type="vkontakte" data-url="https://vk.com/"></div> <div data-block="chat" data-type="facebook" data-url="https://facebook.com"></div> <div data-block="chat" data-type="viber" data-url="https://viber.com"></div> </div> <p>Our address: <a href="#">Nullam dolor massa, porta a nulla in, ultricies vehicula arcu.</a></p> <p>Photos — http://unsplash.com</p> ]]> </turbo:content> </item> </channel> </rss>
Copied to clipboard
import json import pprint import time from urlparse import urlparse import requests from requests import HTTPError HOST_ADDRESS = ' Your site URL. For example, https://example.com' OAUTH_TOKEN = 'Your OAuth token' RSS_STRING = 'RSS feed content' AUTH_HEADER = { 'Authorization': 'OAuth %s' % OAUTH_TOKEN } SESSION = requests.Session() SESSION.headers.update(AUTH_HEADER) API_VERSION = 'v4' API_BASE_URL = 'https://api.webmaster.yandex.net' API_URL = API_BASE_URL + '/' + API_VERSION def validate_api_response(response, required_key_name=None): content_type = response.headers['Content-Type'] content = json.loads(response.text) if 'application/json' in content_type else None if response.status_code == 200: if required_key_name and required_key_name not in content: raise HTTPError('Unexpected API response. Missing required key: %s' % required_key_name, response=response) elif content and 'error_message' in content: raise HTTPError('Error API response. Error message: %s' % content['error_message'], response=response) else: response.raise_for_status() return content def url_to_host_id(url): parsed_url = urlparse(url) scheme = parsed_url.scheme if not scheme: raise ValueError('No protocol (https or http) in url') if scheme != 'http' and scheme != 'https': raise ValueError('Illegal protocol: %s' % scheme) port = parsed_url.port if not port: port = 80 if scheme == 'http' else 443 hostname = parsed_url.hostname hostname = hostname.encode('idna').rstrip('.').lower() return scheme + ':' + hostname + ':' + str(port) def get_user_id(): r = SESSION.get(API_URL + '/user/') c = validate_api_response(r, 'user_id') return c['user_id'] def get_user_host_ids(user_id): path = '/user/{user_id}/hosts'.format(user_id=user_id) r = SESSION.get(API_URL + path) c = validate_api_response(r, 'hosts') host_ids = [host_info['host_id'] for host_info in c['hosts']] return host_ids def is_user_host_id(user_id, host_id): host_ids = get_user_host_ids(user_id) return host_id in host_ids def get_rss_upload_path(user_id, host_id): path = '/user/{user_id}/hosts/{host_id}/turbo/uploadAddress/?mode={mode}'.format( user_id=user_id, host_id=host_id, mode='PRODUCTION') r = SESSION.get(API_URL + path) c = validate_api_response(r, 'upload_address') parsed_url = c['upload_address'] return parsed_url def upload_rss(upload_path, rss_data): headers = { 'Content-Type': 'application/rss+xml' } r = SESSION.post(url=upload_path, data=rss_data, headers=headers) c = validate_api_response(r, 'task_id') return c['task_id'] def get_task_info(user_id, host_id, task_id): path = '/user/{user_id}/hosts/{host_id}/turbo/tasks/{task_id}'.format( user_id=user_id, host_id=host_id, task_id=task_id) r = SESSION.get(API_URL + path) c = validate_api_response(r) return c def retry_call_until(func, predicate, max_tries=5, initial_delay=60, backoff=2): current_delay = initial_delay ret_val = None for n_try in xrange(0, max_tries + 1): ret_val = func() if predicate(ret_val): break print 'Will retry. Sleeping for %ds' % current_delay time.sleep(current_delay) current_delay *= backoff return ret_val user_id = get_user_id() host_id = url_to_host_id(HOST_ADDRESS) upload_path = get_rss_upload_path(user_id, host_id) task_id = upload_rss(upload_path, RSS_STRING) print 'Waiting for the upload task to complete. This will take a while...' task_info = retry_call_until( func=lambda: get_task_info(user_id, host_id, task_id), predicate=lambda task_info: task_info['load_status'] != 'PROCESSING') print 'Task status: %s' % task_info['load_status'] task_info = get_task_info(user_id, host_id, task_id) pp = pprint.PrettyPrinter(indent=4) pp.pprint(task_info)
Copied to clipboard
import json import pprint import time from urllib.parse import urlparse as parse import requests from requests import HTTPError HOST_ADDRESS = ' Address of your site. For example, https://example.com' OAUTH_TOKEN = 'Your OAuth token' RSS_STRING = 'RSS feed content' AUTH_HEADER = { 'Authorization': 'OAuth %s' % OAUTH_TOKEN } SESSION = requests.Session() SESSION.headers.update(AUTH_HEADER) API_VERSION = 'v4' API_BASE_URL = 'https://api.webmaster.yandex.net' API_URL = API_BASE_URL + '/' + API_VERSION def validate_api_response(response, required_key_name=None): content_type = response.headers['Content-Type'] content = json.loads(response.text) if 'application/json' in content_type else None if response.status_code == 200: if required_key_name and required_key_name not in content: raise HTTPError('Unexpected API response. Missing required key: %s' % required_key_name, response=response) elif content and 'error_message' in content: raise HTTPError('Error API response. Error message: %s' % content['error_message'], response=response) else: response.raise_for_status() return content def url_to_host_id(url): parsed_url = parse(url) scheme = parsed_url.scheme if not scheme: raise ValueError('No protocol (https or http) in url') if scheme != 'http' and scheme != 'https': raise ValueError('Illegal protocol: %s' % scheme) port = parsed_url.port if not port: port = 80 if scheme == 'http' else 443 hostname = parsed_url.hostname return scheme + ':' + hostname + ':' + str(port) def get_user_id(): r = SESSION.get(API_URL + '/user/') c = validate_api_response(r, 'user_id') return c['user_id'] def get_user_host_ids(user_id): path = '/user/{user_id}/hosts'.format(user_id=user_id) r = SESSION.get(API_URL + path) c = validate_api_response(r, 'hosts') host_ids = [host_info['host_id'] for host_info in c['hosts']] return host_ids def is_user_host_id(user_id, host_id): host_ids = get_user_host_ids(user_id) return host_id in host_ids def get_rss_upload_path(user_id, host_id): path = '/user/{user_id}/hosts/{host_id}/turbo/uploadAddress/?mode={mode}'.format( user_id=user_id, host_id=host_id, mode='DEBUG') r = SESSION.get(API_URL + path) c = validate_api_response(r, 'upload_address') parsed_url = c['upload_address'] return parsed_url def upload_rss(upload_path, rss_data): headers = { 'Content-Type': 'application/rss+xml' } c = {'task_id': None} try: r = SESSION.post(url=upload_path, data=rss_data.encode('utf-8'), headers=headers) print(r) c = validate_api_response(r, 'task_id') # if the response is successful, no exceptions will be involved r.raise_for_status() except HTTPError as http_err: print(f'HTTP error occurred: {http_err}') # Python 3.6 except Exception as err: print(f'Other error occurred: {err}') # Python 3.6 else: print('Success!') return c['task_id'] def get_task_info(user_id, host_id, task_id): path = '/user/{user_id}/hosts/{host_id}/turbo/tasks/{task_id}'.format( user_id=user_id, host_id=host_id, task_id=task_id) r = SESSION.get(API_URL + path) c = validate_api_response(r) return c def retry_call_until(func, predicate, max_tries=5, initial_delay=60, backoff=2): current_delay = initial_delay ret_val = None for n_try in range(0, max_tries + 1): ret_val = func() if predicate(ret_val): break print('Will retry. Sleeping for %ds', current_delay) time.sleep(current_delay) current_delay *= backoff return ret_val def main(): user_id = get_user_id() host_id = url_to_host_id(HOST_ADDRESS) upload_path = get_rss_upload_path(user_id, host_id) task_id = upload_rss(upload_path, RSS_STRING) print('Waiting for the upload task to complete. This will take a while...') task_info = retry_call_until( func=lambda: get_task_info(user_id, host_id, task_id), predicate=lambda task_info: task_info['load_status'] != 'PROCESSING') print('Task status: %s', task_info['load_status']) task_info = get_task_info(user_id, host_id, task_id) pp = pprint.PrettyPrinter(indent=4) pp.pprint(task_info) main()
Copied to clipboard