offlate/extractdata.py

extractdata.py

1
from lxml import html
2
import requests
3
import json
4
5
data = []
6
7
tplist = requests.get("https://translationproject.org/domain/index.html")
8
if(tplist.status_code == 200):
9
    tree = html.fromstring(tplist.content)
10
    domains = tree.xpath('//table/tr/td[1]/a/text()')
11
    for d in domains:
12
        data.append({"name": d, "system": 0})
13
14
transifexlist = {
15
    'openstreetmap': ['id-editor', 'osmybiz', 'openinghoursfragment',
16
        'presets', 'vespucci'],
17
}
18
for k in transifexlist:
19
    for proj in transifexlist[k]:
20
        data.append({'name': proj, 'system': 1, 'organisation': k})
21
22
with open('offlate/data.json', 'w') as f:
23
    f.write(json.dumps(data))
24