Python script for uploading RSS feeds

Attention. To work with a script you need the knowledge of Python.

The Python script allows you to upload an RSS feed to Yandex.Webmaster. The script consistently sends requests to the Yandex.Turbo pages API and reports the result of the RSS feed upload.

To work with the script, you only need to specify the site address, your OAuth token and the RSS feed content. Other data (user-id, host-id and so on) are received by the script automatically.

Setting up compression
To send the RSS feed in a compressed form, specify the Content-Encoding: gzip title in the upload_rss function.
...
def upload_rss(upload_path, rss_data):
    headers = {
        'Content-Type': 'application/rss+xml',
        'Content-Encoding': 'gzip'
    }
...

Setting up the upload mode

The upload mode is set in the get_rss_upload_path function when declaring the path variable. The debugging mode PRODUCTION is set by default.
...
def get_rss_upload_path(user_id, host_id):
    path = '/user/{user_id}/hosts/{host_id}/turbo/uploadAddress/?mode={mode}'.format(
        user_id=user_id, host_id=host_id, mode='PRODUCTION')
...
To debug Turbo pages, set the DEBUG mode.
...
def get_rss_upload_path(user_id, host_id):
    path = '/user/{user_id}/hosts/{host_id}/turbo/uploadAddress/?mode={mode}'.format(
        user_id=user_id, host_id=host_id, mode='DEBUG')
...

Using the script

To load the RSS feed, add your own data to the script:
  • The URL of the site the RSS feed is loaded for.
  • OAuth token. For more information about getting a token, see Authorization.
  • RSS feed content.

    For a test run you can use the sample RSS feed.
    Example
    <?xml version = "1.0" encoding = "UTF-8"?>
    <rss version="2.0" xmlns:yandex="http://news.yandex.ru" xmlns:turbo="http://turbo.yandex.ru">
      <channel>
        <item turbo="true">
          <title>Page title</title>
          <link>https://example.com</link>
          <turbo:content>
            <![CDATA[
              <header>
                <h1>The Healthy Breakfast restaurant</h1>
                <h2>Healthy and delicious</h2>
                <figure>
                  <img src="https://avatars.mds.yandex.net/get-sbs-sd/403988/e6f459c3-8ada-44bf-a6c9-dbceb60f3757/orig">
                </figure>
                <menu>
                  <a href="https://example.com/page1.html">Menu item 1</a>
                  <a href="https://example.com/page2.html">Menu item 2</a>
                </menu>
              </header>
              <p>What's a good way to start the day? With a delicious and healthy breakfast!</p>
              <p>Join us for breakfast. Check out photos of our dishes <a href="#">on our website</a>.</p>
              <h2>Menu</h2>
              <figure>
                <img src="https://avatars.mds.yandex.net/get-sbs-sd/369181/49e3683c-ef58-4067-91f9-786222aa0e65/orig">
                <figcaption>Omelette with herbs</figcaption>
              </figure>
              <p>Our menu always contains fresh, delicious and healthy options.</p>
              <p>Find out for yourself.</p>
              <button formaction="tel:+7(123)456-78-90"
                data-background-color="#5B97B0"
                data-color="white"
                data-primary="true">Reserve a table</button>
              <div data-block="widget-feedback" data-stick="false">
                <div data-block="chat" data-type="whatsapp" data-url="https://whatsapp.com"></div>
                <div data-block="chat" data-type="telegram" data-url="http://telegram.com/"></div>
                <div data-block="chat" data-type="vkontakte" data-url="https://vk.com/"></div>
                <div data-block="chat" data-type="facebook" data-url="https://facebook.com"></div>
                <div data-block="chat" data-type="viber" data-url="https://viber.com"></div>
              </div>
              <p>Our address: <a href="#">Nullam dolor massa, porta a nulla in, ultricies vehicula arcu.</a></p>
              <p>Photos — http://unsplash.com</p>
            ]]>
          </turbo:content>
        </item>
      </channel>
    </rss>
    Copied to clipboard
import json
import pprint
import time
from urlparse import urlparse

import requests
from requests import HTTPError

HOST_ADDRESS = ' Your site URL. For example, https://example.com'
OAUTH_TOKEN = 'Your OAuth token'
RSS_STRING = 'RSS feed content'

AUTH_HEADER = {
    'Authorization': 'OAuth %s' % OAUTH_TOKEN
}

SESSION = requests.Session()
SESSION.headers.update(AUTH_HEADER)

API_VERSION = 'v4'
API_BASE_URL = 'https://api.webmaster.yandex.net'
API_URL = API_BASE_URL + '/' + API_VERSION


def validate_api_response(response, required_key_name=None):
    content_type = response.headers['Content-Type']
    content = json.loads(response.text) if 'application/json' in content_type else None

    if response.status_code == 200:
        if required_key_name and required_key_name not in content:
            raise HTTPError('Unexpected API response. Missing required key: %s' % required_key_name, response=response)
    elif content and 'error_message' in content:
        raise HTTPError('Error API response. Error message: %s' % content['error_message'], response=response)
    else:
        response.raise_for_status()

    return content


def url_to_host_id(url):
    parsed_url = urlparse(url)

    scheme = parsed_url.scheme
    if not scheme:
        raise ValueError('No protocol (https or http) in url')

    if scheme != 'http' and scheme != 'https':
        raise ValueError('Illegal protocol: %s' % scheme)

    port = parsed_url.port
    if not port:
        port = 80 if scheme == 'http' else 443

    hostname = parsed_url.hostname
    hostname = hostname.encode('idna').rstrip('.').lower()

    return scheme + ':' + hostname + ':' + str(port)


def get_user_id():
    r = SESSION.get(API_URL + '/user/')
    c = validate_api_response(r, 'user_id')

    return c['user_id']


def get_user_host_ids(user_id):
    path = '/user/{user_id}/hosts'.format(user_id=user_id)
    r = SESSION.get(API_URL + path)
    c = validate_api_response(r, 'hosts')

    host_ids = [host_info['host_id'] for host_info in c['hosts']]

    return host_ids


def is_user_host_id(user_id, host_id):
    host_ids = get_user_host_ids(user_id)

    return host_id in host_ids


def get_rss_upload_path(user_id, host_id):
    path = '/user/{user_id}/hosts/{host_id}/turbo/uploadAddress/?mode={mode}'.format(
        user_id=user_id, host_id=host_id, mode='PRODUCTION')

    r = SESSION.get(API_URL + path)
    c = validate_api_response(r, 'upload_address')

    parsed_url = c['upload_address']

    return parsed_url


def upload_rss(upload_path, rss_data):
    headers = {
        'Content-Type': 'application/rss+xml'
    }

    r = SESSION.post(url=upload_path, data=rss_data, headers=headers)
    c = validate_api_response(r, 'task_id')

    return c['task_id']


def get_task_info(user_id, host_id, task_id):
    path = '/user/{user_id}/hosts/{host_id}/turbo/tasks/{task_id}'.format(
        user_id=user_id, host_id=host_id, task_id=task_id)

    r = SESSION.get(API_URL + path)
    c = validate_api_response(r)

    return c


def retry_call_until(func, predicate, max_tries=5, initial_delay=60, backoff=2):
    current_delay = initial_delay

    ret_val = None
    for n_try in xrange(0, max_tries + 1):
        ret_val = func()
        if predicate(ret_val):
            break

        print 'Will retry. Sleeping for %ds' % current_delay
        time.sleep(current_delay)
        current_delay *= backoff

    return ret_val


user_id = get_user_id()
host_id = url_to_host_id(HOST_ADDRESS)
upload_path = get_rss_upload_path(user_id, host_id)
task_id = upload_rss(upload_path, RSS_STRING)

print 'Waiting for the upload task to complete. This will take a while...'
task_info = retry_call_until(
    func=lambda: get_task_info(user_id, host_id, task_id),
    predicate=lambda task_info: task_info['load_status'] != 'PROCESSING')

print 'Task status: %s' % task_info['load_status']
task_info = get_task_info(user_id, host_id, task_id)
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(task_info)
Copied to clipboard