extractdata.py
| 1 | from lxml import html |
| 2 | import requests |
| 3 | import json |
| 4 | |
| 5 | data = [] |
| 6 | |
| 7 | print("Analysing projects at the TP...") |
| 8 | tplist = requests.get("https://translationproject.org/domain/index.html") |
| 9 | if(tplist.status_code == 200): |
| 10 | tree = html.fromstring(tplist.content) |
| 11 | domains = tree.xpath('//table/tr/td[1]/a/text()') |
| 12 | for d in domains: |
| 13 | data.append({"name": d, "system": 0}) |
| 14 | |
| 15 | print("Analysing projects weblate instances...") |
| 16 | weblateservers = ['https://hosted.weblate.org', 'https://translate.fedoraproject.org'] |
| 17 | for server in weblateservers: |
| 18 | print("server: {}".format(server)) |
| 19 | url = server + '/api/projects/' |
| 20 | while url is not None: |
| 21 | lst = requests.get(url) |
| 22 | res = json.loads(lst.content) |
| 23 | url = res['next'] |
| 24 | res = res['results'] |
| 25 | for r in res: |
| 26 | data.append({'name': r['name'], 'system': 4, 'instance': server, |
| 27 | 'project': r['slug']}) |
| 28 | |
| 29 | print("Analysing projects at transifex") |
| 30 | transifexlist = { |
| 31 | 'openstreetmap': ['id-editor', 'osmybiz', 'openinghoursfragment', |
| 32 | 'presets', 'vespucci'], |
| 33 | } |
| 34 | for k in transifexlist: |
| 35 | for proj in transifexlist[k]: |
| 36 | data.append({'name': proj, 'system': 1, 'organisation': k}) |
| 37 | |
| 38 | data.append({"name": "offlate", "system": 2, |
| 39 | "repo": "https://framagit.org/tyreunom/offlate", "branch": "master"}) |
| 40 | |
| 41 | print("done, writing results") |
| 42 | with open('offlate/data.json', 'w') as f: |
| 43 | f.write(json.dumps(data)) |
| 44 |