offlate/extractdata.py

extractdata.py

1
from lxml import html
2
import requests
3
import json
4
5
data = []
6
7
print("Analysing projects at the TP...")
8
tplist = requests.get("https://translationproject.org/domain/index.html")
9
if(tplist.status_code == 200):
10
    tree = html.fromstring(tplist.content)
11
    domains = tree.xpath('//table/tr/td[1]/a/text()')
12
    for d in domains:
13
        data.append({"name": d, "system": 0})
14
15
print("Analysing projects weblate instances...")
16
weblateservers = ['https://hosted.weblate.org', 'https://translate.fedoraproject.org']
17
for server in weblateservers:
18
    print("server: {}".format(server))
19
    url = server + '/api/projects/'
20
    while url is not None:
21
        lst = requests.get(url)
22
        res = json.loads(lst.content)
23
        url = res['next']
24
        res = res['results']
25
        for r in res:
26
            data.append({'name': r['name'], 'system': 4, 'instance': server,
27
                'project': r['slug']})
28
29
print("Analysing projects at transifex")
30
transifexlist = {
31
    'openstreetmap': ['id-editor', 'osmybiz', 'openinghoursfragment',
32
        'presets', 'vespucci'],
33
}
34
for k in transifexlist:
35
    for proj in transifexlist[k]:
36
        data.append({'name': proj, 'system': 1, 'organisation': k})
37
38
data.append({"name": "offlate", "system": 2,
39
    "repo": "https://framagit.org/tyreunom/offlate", "branch": "master"})
40
41
print("done, writing results")
42
with open('offlate/data.json', 'w') as f:
43
    f.write(json.dumps(data))
44