The script below checks the values and value attributes of a variable to confirm that it is as expected. It can be used in a custom QScript automation for Data Cleaning. It provides you with a report that shows OK if all validations were met, or otherwise a list of the variables that failed their validations.
Technical details
The script below creates a list of variables and validations to run on them to confirm if they are valid. It is example code that you can modify with your own variable names and conditions based upon your specific project. It does not fix any invalid data, but can:
- Check that variables contain responses in the correct range.
- Identify Categorical Variables that do not have value labels.
Method
If using Q, this example can be run in C:\Program Files\Q\Examples\phone.sav (this may be located on a different place on your computer depending upon how Q was installed).
var id = "id"; //name of the unique id variable
var show_ids = true;
var conditions_to_check = []; //create empty list of variables and conditions
//// EDIT list of variables and validations to check below use the examples to build your own
conditions_to_check.push(['q1',[1,2],""]); //checks to see if q1 contains values of 1 or 2
conditions_to_check.push(['q2',[],"true"]); //checks to see if q2 contains any values which have no labels + checks that all respondents have data
conditions_to_check.push(['q3',[],"q2 >= 5 && q2 <= 7"]); //checks for values without labels + checks to see if data is inconsistent with filter of "q2 >= 5 && q2 <= 7"
conditions_to_check.push(['q4',[],"true"]); //checks for values without labels + checks to see if data is inconsistent with filter of "q2 >= 5 && q2 <= 7"
conditions_to_check.push(['q5',[],"true"]); //checks for values without labels + checks to see if data is inconsistent with filter of "q2 >= 5 && q2 <= 7"
conditions_to_check.push(['Q5_1',[0,1],"true"]); //checks for values of 0 and 1 + checks to see if respondent has data
conditions_to_check.push(['Q5_2',[0,1],"true"]);
conditions_to_check.push(['Q5_3',[0,1],"true"]);
conditions_to_check.push(['Q5_4',[0,1],"true"]);
conditions_to_check.push(['Q5_5',[0,1],"true"]);
conditions_to_check.push(['Q5_6',[0,1],"true"]);
conditions_to_check.push(['Q5_7',[0,1],"true"]);
conditions_to_check.push(['Q5_8',[0,1],"true"]);
conditions_to_check.push(['Q5_9',[0,1],"true"]);
conditions_to_check.push(['Q5_10',[0,1],"true"]);
conditions_to_check.push(['Q5_11',[0,1],"true"]);
conditions_to_check.push(['Q6_1',[0,1],"Q5_1 == 0"]); // checks for values of 0 and 1 + checks that only asked to people with missing data in q5_1
conditions_to_check.push(['Q6_2',[0,1],"Q5_2 == 0"]);
if (project.dataFiles.length != 1)
alert("Warning: multiple data files exist in this project; only the first is being examined.");
var data = project.dataFiles[0];
var n_conditions = conditions_to_check.length;
//for (var data_i in project.dataFiles) {
// var data = project.dataFiles[data_i];
var invalid_values_prefix = "Invalid values: ";
var report = "Data cleaning report (NB: this report gives information for use in data cleaning but does not perform any cleaning)\r\n";
for (var condition=0; condition<n_conditions; condition++){
var details = conditions_to_check[condition];
var name = details[0];
var results = "";
var acceptable_values = details[1]
var check_using_labels = acceptable_values.length == 0;
var variable = data.getVariableByName(name).duplicate();
var label = variable.label;
var values = variable.uniqueValues;
var invalid_values_message = invalid_values_prefix;
//checking for out of range value
for (var i=0; i<values.length; i++){
var v = values[i];
if (!isNaN(v)){
var vstring = v.toString();
if (check_using_labels ? vstring == variable.valueAttributes.getLabel(v) : acceptable_values.indexOf(parseInt(v)) == -1)
invalid_values_message += vstring + " ";
}
}
variable.deleteVariable();
results += (invalid_values_message.length == invalid_values_prefix.length ? "" : invalid_values_message);
//checking skips
var filter_expression = details[2];
var t = project.report.appendTable();
if (filter_expression != ""){
//missing data
var filter_expression = details[2];
var temp_name = "Dummy5435643";
var v = data.getVariableByName(temp_name);
if (v != null)
v.deleteVariable();
var v = data.newJavaScriptVariable("(" + filter_expression + ") && isNaN(" + name + ")", false, temp_name, temp_name, null);
var q = project.dataFiles[0].getQuestionByName("Dummy5435643")
q.isFilter = true;
t.primary = data.getVariableByName(id).question;
t.secondary = "RAW DATA";
t.filters = [v];
var output = t.calculateOutput()
var ids = output.get('Values')
if (ids.length > 0){
var invalid_ids = "Missing data (n=" + ids.length + ") ";
if (show_ids){
invalid_ids += ": ";
for (var c=0; c<ids.length ; c++)
invalid_ids += ids[c][0] + " " ;
}
results += invalid_ids;
}
v.deleteVariable();
//Excess data
var v = data.newJavaScriptVariable("(!(" + filter_expression + ") && !isNaN(" + name + "))", false, temp_name, temp_name, null);
var q = project.dataFiles[0].getQuestionByName("Dummy5435643")
q.isFilter = true;
t.primary = data.getVariableByName(id).question;
t.secondary = "RAW DATA";
t.filters = [v];
var output = t.calculateOutput()
var ids = output.get('Values')
if (ids.length > 0){
var invalid_ids = "Excess data (n=" + ids.length + ") ";
if (show_ids){
invalid_ids += ": ";
for (var c=0; c<ids.length ; c++)
invalid_ids += ids[c][0] + " " ;
}
results += invalid_ids;
}
v.deleteVariable();
}
var res = name ;
for (var c=name.length; c<20; c++)
res = res + " ";
report += "\r\n" + res + (results.length == "" ? "OK" : results);
t.deleteItem();
}
log(report + '\r\n\r\nFinished!');
See also
- QScript for an explanation of how to run this code.
- QScript Reference for technical information.
- JavaScript for information about the JavaScript programming language.