diff --git a/Dockerfile b/Dockerfile index a4fcc566..5177de19 100755 --- a/Dockerfile +++ b/Dockerfile @@ -45,7 +45,7 @@ RUN apt-get install -y python-dev python-pip && \ pip install \ openpyxl \ gspread \ - xlrd \ + xlrd==1.2.0 \ lepl \ requests_toolbelt \ cassandra-driver \ diff --git a/src/MGRAST/bin/validate_metadata b/src/MGRAST/bin/validate_metadata index 90276a88..0a759d6c 100755 --- a/src/MGRAST/bin/validate_metadata +++ b/src/MGRAST/bin/validate_metadata @@ -285,8 +285,8 @@ def google_to_mdraw(url, dupe, inc, login, password): return tables_to_mdraw(tables, dupe) def xlsx_to_mdraw(xxfile, dupe, inc): - xxbook = load_workbook(filename = xxfile) - tables = dict([(x, sheet_to_table(xxbook[x], 'xlsx')) for x in xxbook.sheetnames()]) + xxbook = load_workbook(filename = xxfile, data_only="True") + tables = dict([(x, sheet_to_table(xxbook[x], 'xlsx')) for x in xxbook.sheetnames]) if inc: return tables_to_mdraw_inc(tables, dupe) else: @@ -307,9 +307,18 @@ def sheet_to_table(sheet, source): maxCol = sheet.max_column for r in range(maxRow): row = [] - for c in range(maxCol): - row.append( sheet.cell(row=r,column=c).value ) - table.append(row) + rowcontents = [ sheet.cell(row=r+1, column=i+1).value for i in range(maxCol)] + # do not add empty rows, unless there is only one column + if not all([ element is None for element in rowcontents] ) or maxCol == 1 : + for c in range(maxCol): + value = sheet.cell(row=r+1,column=c+1).value + # sanitize None values to empty strings + if value is None: + row.append("") + else: + row.append( value ) + table.append(row) + if source == 'xls': maxRow = sheet.nrows maxCol = sheet.ncols @@ -691,7 +700,7 @@ def main(args): try: mdraw = xls_to_mdraw(md_source, opts.dupe, opts.inc) except: - errorMsg('Invald file format: '+md_source, 1) + errorMsg('Error loading xlsx, xls file: '+md_source, 1) elif opts.format == 'xls': try: mdraw = xls_to_mdraw(md_source, opts.dupe, opts.inc) @@ -699,7 +708,7 @@ def main(args): try: mdraw = xlsx_to_mdraw(md_source, opts.dupe, opts.inc) except: - errorMsg('Invald file format: '+md_source, 1) + errorMsg('Error loading xls, xlsx file: '+md_source, 1) elif opts.format == 'google': mdraw = google_to_mdraw(md_source, opts.dupe, opts.inc, opts.login, opts.password) #pprint.pprint(mdraw) diff --git a/src/MGRAST/lib/Auth.pm b/src/MGRAST/lib/Auth.pm index af20150f..0387e69d 100644 --- a/src/MGRAST/lib/Auth.pm +++ b/src/MGRAST/lib/Auth.pm @@ -89,7 +89,10 @@ sub authenticate { } elsif (exists($response->{error}) && $response->{error}) { $verbose.=', "preferences": "ERROR - '.$response->{error}.'"'; } else { + $verbose.=', "preferences": "" '; + eval{ # Users with corrupt preferences were being denied access here (mgu21652) $verbose.=', "preferences": '.$json->encode($response->{data}->{attributes}->{pref}); + } } } if ($us->has_right(undef, 'edit', 'user', '*')) { diff --git a/src/MGRAST/lib/ElasticSearch.pm b/src/MGRAST/lib/ElasticSearch.pm index f496e684..92bc6877 100644 --- a/src/MGRAST/lib/ElasticSearch.pm +++ b/src/MGRAST/lib/ElasticSearch.pm @@ -129,6 +129,8 @@ our $fields = { filter_ambig => 'pipeline_parameters_filter_ambig', filter_ln => 'pipeline_parameters_filter_ln', bowtie => 'pipeline_parameters_bowtie', + project_tags => 'project_tags', + sample_tags => 'sample_tags', }; our $prefixes = { @@ -163,6 +165,7 @@ our $prefixes = { 'project_funding', 'project_name', 'project_id', + 'tags', ], 'sample_' => [ 'biome_id', @@ -191,6 +194,7 @@ our $prefixes = { 'feature', 'metagenome_taxonomy_id', 'env_package', + 'tags', ], 'env_package_' => [ 'env_package_id', @@ -321,6 +325,8 @@ our $types = { filter_ambig => 'boolean', filter_ln => 'boolean', bowtie => 'boolean', + project_tags => 'arrayText', + sample_tags => 'arrayText', }; our $taxa_num = [ diff --git a/src/MGRAST/lib/resources/heartbeat.pm b/src/MGRAST/lib/resources/heartbeat.pm index 3b2a69fd..de3da6a1 100644 --- a/src/MGRAST/lib/resources/heartbeat.pm +++ b/src/MGRAST/lib/resources/heartbeat.pm @@ -18,7 +18,7 @@ sub new { $self->{name} = "heartbeat"; $self->{m5nr_version} = 1; $self->{services} = { - 'FTP' => 'ftp://'.$Conf::ftp_download, +# 'FTP' => 'ftp://'.$Conf::ftp_download, 'website' => $Conf::web_site, 'SHOCK' => $Conf::shock_url, 'SHOCKDB' => 'mongo', @@ -30,7 +30,7 @@ sub new { 'cassandra' => $Conf::cassandra_m5nr }; $self->{attributes} = { "service" => [ 'string', "cv", [ - ['FTP', 'file server'], +# ['FTP', 'file server'], ['website', 'MG-RAST website'], ['SHOCK', 'object storage'], ['SHOCKDB', 'object storage mongodb'], diff --git a/src/MGRAST/lib/resources/resource.pm b/src/MGRAST/lib/resources/resource.pm index 936466e5..3071695f 100644 --- a/src/MGRAST/lib/resources/resource.pm +++ b/src/MGRAST/lib/resources/resource.pm @@ -1369,7 +1369,7 @@ sub get_shock_preauth { my $response = undef; eval { my @args = $auth ? ('Authorization', "$authPrefix $auth") : (); - my $get = $self->agent->get($Conf::shock_url.'/node/'.$id.'?download_url'.($fn ? "&filename=".$fn : ""), @args); + my $get = $self->agent->get($Conf::shock_url.'/node/'.$id.'?download_url'.($fn ? "&file_name=".$fn : ""), @args); $response = $self->json->decode( $get->content ); }; if ($@ || (! ref($response))) { @@ -2277,13 +2277,12 @@ sub upsert_to_elasticsearch_annotation { $self->json->utf8(); my $success = {}; - # PUT docuemnt(s) + # PUT document(s) foreach my $key (keys %$results) { if ($results->{$key}) { my $entry = $self->json->encode($results->{$key}); my $esurl = $Conf::es_host."/$index/$key/$mgid?parent=$mgid"; my $response = undef; - eval { my @args = ( 'Content_Type', 'application/json', 'Content', $entry @@ -2291,10 +2290,12 @@ sub upsert_to_elasticsearch_annotation { my $req = POST($esurl, @args); $req->method('PUT'); my $put = $self->agent->request($req); + $response = $put->content; # If it's an error message, preserve it. + eval { $response = $self->json->decode($put->content); }; if ($@ || (! ref($response)) || $response->{error} || (! $response->{result})) { - $success->{$key} = "failed"; + $success->{$key} = "failed: $response"; } $success->{$key} = "updated"; } else { @@ -2415,19 +2416,29 @@ sub get_elastic_query { } my $content; + my $res; # need it here for error trapping eval { - my $res = $self->agent->post($server.'/_search', Content => $self->json->encode($postJSON)); + $res = $self->agent->post($server.'/_search', Content => $self->json->encode($postJSON), + "Content-Type", "application/json"); $content = $self->json->decode($res->content); }; if ($@ || (! ref($content))) { + # one of the likely things to get us here is that json->decode fails because error message, not json + $self->return_data( { "ERROR" => $res->content} , 500) ; return undef, $@; } elsif (exists $content->{error}) { - if (exists($content->{error}{type}) && exists($content->{error}{reason}) && exists($content->{status})) { - $self->return_data( {"ERROR" => $content->{error}{type}.": ".$content->{error}{reason}}, $content->{status} ); - } else { - $self->return_data( {"ERROR" => "Invalid Elastic Search return response"}, 500 ); - } - } else { + if (ref $content->{error}) { + eval { + if (exists($content->{error}{type}) && exists($content->{error}{reason}) && exists($content->{status})) { + $self->return_data( {"ERROR" => $content->{error}{type}.": ".$content->{error}{reason}.$res->$content}, $content->{status} ); + } else { + $self->return_data( {"ERROR" => "Invalid Elastic Search return response: $res"}, 500 ); + } + # fallback -- if error, but eval aborts + $self->return_data( { "ERROR" => $res->content }, 500) ; } + } else { # content->error is not ref + $self->return_data( { "ERROR" => "ES response: $content->{error}" }, 500) ; } + } else { #no error return $content; } } @@ -3438,6 +3449,10 @@ sub jsonTypecast { $val =~ s/^\s+//; $val =~ s/\s+$//; $val =~ s/\s+/ /g; + } elsif ( $type eq 'arrayText' ) { + my @array = split(';', $val); + $val = \@array; + } elsif (($type eq 'integer') || ($type eq 'long')) { if ($val =~ /^[+-]?\d+$/) { $val = int($val); diff --git a/src/MGRAST/lib/resources/search.pm b/src/MGRAST/lib/resources/search.pm index 80201128..817ff272 100644 --- a/src/MGRAST/lib/resources/search.pm +++ b/src/MGRAST/lib/resources/search.pm @@ -243,10 +243,18 @@ sub query { $query =~ s/^\s+|\s+$//g; $query =~ s/\s+/ /g; # remove specified fields (non-escaped ':'), only using set default - if (($query =~ /:/) && ($query !~ /\\:/)) { + # if (($query =~ /:/) && ($query !~ /\\:/)) { + # my @parts = split(/:/, $query); + # $query = join(" ", @parts[1..$#parts]); + # } + # remove specified fields (non-escaped ':'), only using set default + if (( $field !~ /_tag/ ) && ($query =~ /:/) && ($query !~ /\\:/)) { my @parts = split(/:/, $query); $query = join(" ", @parts[1..$#parts]); } + else { + $query=~s/\:/\\\:/g + } push @$queries, {"field" => $key, "query" => $query, "type" => $type}; } } diff --git a/src/MGRAST/pylib/mgrast_cassandra.py b/src/MGRAST/pylib/mgrast_cassandra.py index 3208c485..aa84cb10 100644 --- a/src/MGRAST/pylib/mgrast_cassandra.py +++ b/src/MGRAST/pylib/mgrast_cassandra.py @@ -246,7 +246,7 @@ def get_md5_records(self, job, swap=None, md5s=None, evalue=None, identity=None, rmqLogger(self.channel, 'select', query) rows = self.session.execute(query) for r in rows: - if r[1] == 0: + if r[1] == 0 or r[0] is None: # skip row if zero length, or row is corrupt continue pos = bisect.bisect(found, (r[0], None)) if (pos > 0) and ((found[pos-1][0] + found[pos-1][1]) == r[0]): diff --git a/src/PPO/PPOBackend.pm b/src/PPO/PPOBackend.pm index e41b6789..c57cfeac 100644 --- a/src/PPO/PPOBackend.pm +++ b/src/PPO/PPOBackend.pm @@ -209,7 +209,7 @@ sub get_rows { ? ' DESC' : ' ASC'; } - my $statement = sprintf ("SELECT %s FROM %s%s%s", + my $statement = sprintf ("SELECT %s FROM %s%s%s%s", (@$fields) ? join (",", @$fields) : '*', $table, ($conditions) ? " WHERE $conditions" : '',