From 502d216724b259408097b1f1e87792aa3d502fad Mon Sep 17 00:00:00 2001 From: Tobias Bengfort Date: Mon, 23 Nov 2020 16:21:00 +0100 Subject: [PATCH 1/7] move attribute export logic to separate file --- castellum/recruitment/attribute_exporters.py | 75 +++++++++++++++++++ .../management/commands/attribute_export.py | 55 +++----------- 2 files changed, 86 insertions(+), 44 deletions(-) create mode 100644 castellum/recruitment/attribute_exporters.py diff --git a/castellum/recruitment/attribute_exporters.py b/castellum/recruitment/attribute_exporters.py new file mode 100644 index 000000000..c7ee1b6fa --- /dev/null +++ b/castellum/recruitment/attribute_exporters.py @@ -0,0 +1,75 @@ +# (c) 2018-2020 +# MPIB , +# MPI-CBS , +# MPIP +# +# This file is part of Castellum. +# +# Castellum is free software; you can redistribute it and/or modify it +# under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Castellum is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public +# License along with Castellum. If not, see +# . + +import json + +from .attribute_fields import ANSWER_DECLINED + + +class JSONExporter: + TYPES = { + 'IntegerField': 'integer', + 'BooleanField': 'boolean', + } + FORMATS = { + 'DateField': 'date', + 'AgeField': 'date', + } + + def get_schema(self, descriptions): + schema = { + 'type': 'object', + 'properties': {}, + } + + for description in descriptions: + key = description.label + choices = list(description.attributechoice_set.all()) + + schema['properties'][key] = {k: v for k, v in [ + ('type', self.TYPES.get(description.field_type, 'string')), + ('format', self.FORMATS.get(description.field_type)), + ('enum', [c.label for c in choices]), + ('description', description.help_text), + ] if v} + + return json.dumps(schema, sort_keys=True, indent=4) + + def get_data(self, descriptions, subjects): + data = [] + + for subject in subjects: + data.append({}) + + for description in descriptions: + key = description.label + choices = list(description.attributechoice_set.all()) + + value = subject.attributes.get(description.json_key) + if value in [None, ANSWER_DECLINED]: + pass + elif choices: + choice = description.attributechoice_set.get(pk=value) + data[-1][key] = choice.label + else: + data[-1][key] = value + + return json.dumps(data, sort_keys=True, indent=4) diff --git a/castellum/recruitment/management/commands/attribute_export.py b/castellum/recruitment/management/commands/attribute_export.py index a8006928b..bca174bdd 100644 --- a/castellum/recruitment/management/commands/attribute_export.py +++ b/castellum/recruitment/management/commands/attribute_export.py @@ -20,59 +20,26 @@ # License along with Castellum. If not, see # . -import json from django.core.management.base import BaseCommand +from castellum.recruitment.attribute_exporters import JSONExporter from castellum.recruitment.models import AttributeDescription -from castellum.recruitment.models.attributes import ANSWER_DECLINED from castellum.subjects.models import Subject -TYPES = { - 'IntegerField': 'integer', - 'BooleanField': 'boolean', -} -FORMATS = { - 'DateField': 'date', - 'AgeField': 'date', -} - class Command(BaseCommand): - help = 'Export attributes for a single subject (along with the schema).' + help = 'Export attributes for a single subject or the attribute schema.' def add_arguments(self, parser): - parser.add_argument('subject_id', type=int) + parser.add_argument('subject_id', type=int, nargs='?') def handle(self, **options): - subject = Subject.objects.get(pk=options['subject_id']) - - output = { - 'data': {}, - 'schema': { - 'type': 'object', - 'properties': {}, - }, - } - - for description in AttributeDescription.objects.all(): - key = description.label - choices = list(description.attributechoice_set.all()) - - value = subject.attributes.get(description.json_key) - if value in [None, ANSWER_DECLINED]: - pass - elif choices: - choice = description.attributechoice_set.get(pk=value) - output['data'][key] = choice.label - else: - output['data'][key] = value - - output['schema']['properties'][key] = {k: v for k, v in [ - ('type', TYPES.get(description.field_type, 'string')), - ('format', FORMATS.get(description.field_type)), - ('enum', [c.label for c in choices]), - ('description', description.help_text), - ] if v} - - print(json.dumps(output, sort_keys=True, indent=4)) + exporter = JSONExporter() + descriptions = AttributeDescription.objects.all() + if options['subject_id'] is None: + s = exporter.get_schema(descriptions) + else: + subject = Subject.objects.get(pk=options['subject_id']) + s = exporter.get_data(descriptions, [subject]) + print(s) -- GitLab From 6b2293bd35c4341e3b5a715ea7c547f32afcf268 Mon Sep 17 00:00:00 2001 From: Tobias Bengfort Date: Wed, 25 Nov 2020 11:22:44 +0100 Subject: [PATCH 2/7] allow to configure attribute exporter --- castellum/recruitment/attribute_exporters.py | 8 ++++++++ .../recruitment/management/commands/attribute_export.py | 5 +++-- castellum/settings/default.py | 7 +++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/castellum/recruitment/attribute_exporters.py b/castellum/recruitment/attribute_exporters.py index c7ee1b6fa..efd7a20d1 100644 --- a/castellum/recruitment/attribute_exporters.py +++ b/castellum/recruitment/attribute_exporters.py @@ -21,9 +21,17 @@ import json +from django.conf import settings +from django.utils.module_loading import import_string + from .attribute_fields import ANSWER_DECLINED +def get_exporter(path=None): + cls = import_string(path or settings.CASTELLUM_ATTRIBUTE_EXPORTER) + return cls() + + class JSONExporter: TYPES = { 'IntegerField': 'integer', diff --git a/castellum/recruitment/management/commands/attribute_export.py b/castellum/recruitment/management/commands/attribute_export.py index bca174bdd..184e06750 100644 --- a/castellum/recruitment/management/commands/attribute_export.py +++ b/castellum/recruitment/management/commands/attribute_export.py @@ -23,7 +23,7 @@ from django.core.management.base import BaseCommand -from castellum.recruitment.attribute_exporters import JSONExporter +from castellum.recruitment.attribute_exporters import get_exporter from castellum.recruitment.models import AttributeDescription from castellum.subjects.models import Subject @@ -33,9 +33,10 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument('subject_id', type=int, nargs='?') + parser.add_argument('--exporter') def handle(self, **options): - exporter = JSONExporter() + exporter = get_exporter(options.get('exporter')) descriptions = AttributeDescription.objects.all() if options['subject_id'] is None: s = exporter.get_schema(descriptions) diff --git a/castellum/settings/default.py b/castellum/settings/default.py index fa48e24c3..715374211 100644 --- a/castellum/settings/default.py +++ b/castellum/settings/default.py @@ -417,5 +417,12 @@ CASTELLUM_FULL_AGE = 16 # Criteria that apply to all studies CASTELLUM_GENERAL_EXCLUSION_CRITERIA = '' +# Default exporter used for attributes. +# Currently available options are +# - 'castellum.recruitment.attribute_exporters.JSONExporter' +# You can also implement your own if required. +# See castellum/recruitment/attribute_exporters.py for details +CASTELLUM_ATTRIBUTE_EXPORTER = 'castellum.recruitment.attribute_exporters.JSONExporter' + SCHEDULER_URL = '' SCHEDULER_TOKEN = '' -- GitLab From 66adc31f562bef5824889f6de3ed3bb1cf6fa6fa Mon Sep 17 00:00:00 2001 From: Tobias Bengfort Date: Mon, 23 Nov 2020 16:26:34 +0100 Subject: [PATCH 3/7] Add ID field to export --- castellum/recruitment/attribute_exporters.py | 12 +++++++++--- .../management/commands/attribute_export.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/castellum/recruitment/attribute_exporters.py b/castellum/recruitment/attribute_exporters.py index efd7a20d1..f755eaeec 100644 --- a/castellum/recruitment/attribute_exporters.py +++ b/castellum/recruitment/attribute_exporters.py @@ -45,7 +45,11 @@ class JSONExporter: def get_schema(self, descriptions): schema = { 'type': 'object', - 'properties': {}, + 'properties': { + 'id': { + 'type': 'string', + }, + }, } for description in descriptions: @@ -64,8 +68,10 @@ class JSONExporter: def get_data(self, descriptions, subjects): data = [] - for subject in subjects: - data.append({}) + for _id, subject in subjects: + data.append({ + 'id': str(_id), + }) for description in descriptions: key = description.label diff --git a/castellum/recruitment/management/commands/attribute_export.py b/castellum/recruitment/management/commands/attribute_export.py index 184e06750..c37bd2431 100644 --- a/castellum/recruitment/management/commands/attribute_export.py +++ b/castellum/recruitment/management/commands/attribute_export.py @@ -42,5 +42,5 @@ class Command(BaseCommand): s = exporter.get_schema(descriptions) else: subject = Subject.objects.get(pk=options['subject_id']) - s = exporter.get_data(descriptions, [subject]) + s = exporter.get_data(descriptions, [(subject.pk, subject)]) print(s) -- GitLab From 36d4e584f5f892a7f33394baa458ca61176fa445 Mon Sep 17 00:00:00 2001 From: Tobias Bengfort Date: Mon, 23 Nov 2020 16:26:36 +0100 Subject: [PATCH 4/7] Use django json encoder See https://docs.djangoproject.com/en/2.2/topics/serialization/#django.core.serializers.json.DjangoJSONEncoder --- castellum/recruitment/attribute_exporters.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/castellum/recruitment/attribute_exporters.py b/castellum/recruitment/attribute_exporters.py index f755eaeec..5d9967a35 100644 --- a/castellum/recruitment/attribute_exporters.py +++ b/castellum/recruitment/attribute_exporters.py @@ -22,6 +22,7 @@ import json from django.conf import settings +from django.core.serializers.json import DjangoJSONEncoder from django.utils.module_loading import import_string from .attribute_fields import ANSWER_DECLINED @@ -42,6 +43,9 @@ class JSONExporter: 'AgeField': 'date', } + def _json_dumps(self, data): + return json.dumps(data, sort_keys=True, indent=4, cls=DjangoJSONEncoder) + def get_schema(self, descriptions): schema = { 'type': 'object', @@ -63,7 +67,7 @@ class JSONExporter: ('description', description.help_text), ] if v} - return json.dumps(schema, sort_keys=True, indent=4) + return self._json_dumps(schema) def get_data(self, descriptions, subjects): data = [] @@ -86,4 +90,4 @@ class JSONExporter: else: data[-1][key] = value - return json.dumps(data, sort_keys=True, indent=4) + return self._json_dumps(data) -- GitLab From e4385356f50c688bb6cba8300b0cb69e7be73a9f Mon Sep 17 00:00:00 2001 From: Tobias Bengfort Date: Mon, 23 Nov 2020 16:26:38 +0100 Subject: [PATCH 5/7] Extend exporter API to provide filenames --- castellum/recruitment/attribute_exporters.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/castellum/recruitment/attribute_exporters.py b/castellum/recruitment/attribute_exporters.py index 5d9967a35..c11e8bcb8 100644 --- a/castellum/recruitment/attribute_exporters.py +++ b/castellum/recruitment/attribute_exporters.py @@ -69,6 +69,9 @@ class JSONExporter: return self._json_dumps(schema) + def get_schema_filename(self): + return 'attributes.schema.json' + def get_data(self, descriptions, subjects): data = [] @@ -91,3 +94,6 @@ class JSONExporter: data[-1][key] = value return self._json_dumps(data) + + def get_data_filename(self): + return 'attributes.json' -- GitLab From 55e5e8b0397134c202df2f5313fae772ca522a37 Mon Sep 17 00:00:00 2001 From: Tobias Bengfort Date: Mon, 23 Nov 2020 16:26:40 +0100 Subject: [PATCH 6/7] add BIDSExporter --- castellum/recruitment/attribute_exporters.py | 53 ++++++++++++++++++++ castellum/settings/default.py | 1 + 2 files changed, 54 insertions(+) diff --git a/castellum/recruitment/attribute_exporters.py b/castellum/recruitment/attribute_exporters.py index c11e8bcb8..4ecc0576c 100644 --- a/castellum/recruitment/attribute_exporters.py +++ b/castellum/recruitment/attribute_exporters.py @@ -19,7 +19,9 @@ # License along with Castellum. If not, see # . +import csv import json +from io import StringIO from django.conf import settings from django.core.serializers.json import DjangoJSONEncoder @@ -97,3 +99,54 @@ class JSONExporter: def get_data_filename(self): return 'attributes.json' + + +class BIDSExporter: + # https://bids-specification.readthedocs.io/ + + def _json_dumps(self, data): + return json.dumps(data, sort_keys=True, indent=4, cls=DjangoJSONEncoder) + + def get_schema(self, descriptions): + schema = {} + + for description in descriptions: + key = description.label.lower().replace(' ', '_') + choices = list(description.attributechoice_set.all()) + + schema[key] = {k: v for k, v in [ + ('Levels', {c.id: c.label for c in choices}), + ('Description', description.help_text), + ('TermURL', description.url), + ] if v} + + return self._json_dumps(schema) + + def get_schema_filename(self): + return 'participants.json' + + def get_data(self, descriptions, subjects): + fieldnames = ['participant_id'] + for desc in descriptions: + key = desc.label.lower().replace(' ', '_') + fieldnames.append(key) + + fh = StringIO() + writer = csv.DictWriter(fh, fieldnames=fieldnames, dialect=csv.excel_tab) + writer.writeheader() + + for _id, subject in subjects: + row = {'participant_id': 'sub-%s' % _id} + for desc in descriptions: + key = desc.label.lower().replace(' ', '_') + value = subject.attributes.get(desc.json_key) + if value in [None, ANSWER_DECLINED]: + row[key] = 'n/a' + else: + row[key] = value + writer.writerow(row) + + return fh.getvalue() + + def get_data_filename(self): + return 'participants.tsv' diff --git a/castellum/settings/default.py b/castellum/settings/default.py index 715374211..8084c13de 100644 --- a/castellum/settings/default.py +++ b/castellum/settings/default.py @@ -420,6 +420,7 @@ CASTELLUM_GENERAL_EXCLUSION_CRITERIA = '' # Default exporter used for attributes. # Currently available options are # - 'castellum.recruitment.attribute_exporters.JSONExporter' +# - 'castellum.recruitment.attribute_exporters.BIDSExporter' # You can also implement your own if required. # See castellum/recruitment/attribute_exporters.py for details CASTELLUM_ATTRIBUTE_EXPORTER = 'castellum.recruitment.attribute_exporters.JSONExporter' -- GitLab From 069ed704dcb032ffaffd9d2fcd9c1e763ed464cc Mon Sep 17 00:00:00 2001 From: Tobias Bengfort Date: Tue, 24 Nov 2020 12:43:45 +0100 Subject: [PATCH 7/7] add tests --- tests/recruitment/test_attribute_exporters.py | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 tests/recruitment/test_attribute_exporters.py diff --git a/tests/recruitment/test_attribute_exporters.py b/tests/recruitment/test_attribute_exporters.py new file mode 100644 index 000000000..a8cb0ff55 --- /dev/null +++ b/tests/recruitment/test_attribute_exporters.py @@ -0,0 +1,111 @@ +from castellum.recruitment import attribute_exporters +from castellum.recruitment.models import AttributeDescription + +ATTRIBUTES = { + 'd1': 1, + 'd2': 'de', + 'd3': '1970-01-01', +} + +JSON_SCHEMA = """{ + "properties": { + "Date of birth": { + "format": "date", + "type": "string" + }, + "Handedness": { + "enum": [ + "Right", + "Left", + "Ambidextrous" + ], + "type": "string" + }, + "Highest degree": { + "enum": [ + "No degree", + "Elementary school", + "Hauptschule", + "Mittlere Reife", + "Abitur", + "Bachelor", + "Master" + ], + "type": "string" + }, + "Language": { + "type": "string" + }, + "id": { + "type": "string" + } + }, + "type": "object" +}""" + +JSON_DATA = """[ + { + "Date of birth": "1970-01-01", + "Handedness": "Right", + "Language": "de", + "id": "test" + } +]""" + +BIDS_SCHEMA = """{ + "date_of_birth": { + "TermURL": "http://purl.bioontology.org/ontology/SNOMEDCT/397669002" + }, + "handedness": { + "Levels": { + "1": "Right", + "2": "Left", + "3": "Ambidextrous" + }, + "TermURL": "http://purl.bioontology.org/ontology/SNOMEDCT/57427004" + }, + "highest_degree": { + "Levels": { + "4": "No degree", + "5": "Elementary school", + "6": "Hauptschule", + "7": "Mittlere Reife", + "8": "Abitur", + "9": "Bachelor", + "10": "Master" + } + }, + "language": {} +}""" + +BIDS_DATA = """participant_id\thandedness\tlanguage\tdate_of_birth\thighest_degree\r +sub-test\t1\tde\t1970-01-01\tn/a\r +""" + + +def test_json_schema(attribute_descriptions, db): + exporter = attribute_exporters.JSONExporter() + descriptions = AttributeDescription.objects.all() + assert exporter.get_schema(descriptions) == JSON_SCHEMA + + +def test_json_data(attribute_descriptions, contact): + exporter = attribute_exporters.JSONExporter() + descriptions = AttributeDescription.objects.all() + contact.subject.attributes = ATTRIBUTES + subjects = [('test', contact.subject)] + assert exporter.get_data(descriptions, subjects) == JSON_DATA + + +def test_bids_schema(attribute_descriptions, db): + exporter = attribute_exporters.BIDSExporter() + descriptions = AttributeDescription.objects.all() + assert exporter.get_schema(descriptions) == BIDS_SCHEMA + + +def test_bids_data(attribute_descriptions, contact): + exporter = attribute_exporters.BIDSExporter() + descriptions = AttributeDescription.objects.all() + contact.subject.attributes = ATTRIBUTES + subjects = [('test', contact.subject)] + assert exporter.get_data(descriptions, subjects) == BIDS_DATA -- GitLab