From 21ffc175e9cfad03d9d82deba0f84df2c32b9896 Mon Sep 17 00:00:00 2001 From: Wolfgang Gerlach Date: Fri, 23 Aug 2019 14:37:05 -0500 Subject: [PATCH 01/14] fix sprintf --- src/PPO/PPOBackend.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PPO/PPOBackend.pm b/src/PPO/PPOBackend.pm index e41b6789..c57cfeac 100644 --- a/src/PPO/PPOBackend.pm +++ b/src/PPO/PPOBackend.pm @@ -209,7 +209,7 @@ sub get_rows { ? ' DESC' : ' ASC'; } - my $statement = sprintf ("SELECT %s FROM %s%s%s", + my $statement = sprintf ("SELECT %s FROM %s%s%s%s", (@$fields) ? join (",", @$fields) : '*', $table, ($conditions) ? " WHERE $conditions" : '', From 3b0b4d737769677e611607b990431267bc855fec Mon Sep 17 00:00:00 2001 From: Will Trimble Date: Thu, 29 Aug 2019 14:21:41 -0500 Subject: [PATCH 02/14] better error passthrough for elasticserach errors --- src/MGRAST/lib/resources/resource.pm | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/src/MGRAST/lib/resources/resource.pm b/src/MGRAST/lib/resources/resource.pm index bfc33d1f..5baca068 100644 --- a/src/MGRAST/lib/resources/resource.pm +++ b/src/MGRAST/lib/resources/resource.pm @@ -2415,19 +2415,29 @@ sub get_elastic_query { } my $content; + my $res; # need it here for error trapping eval { - my $res = $self->agent->post($server.'/_search', Content => $self->json->encode($postJSON)); + $res = $self->agent->post($server.'/_search', Content => $self->json->encode($postJSON), + "Content-Type", "application/json"); $content = $self->json->decode($res->content); }; if ($@ || (! ref($content))) { + # one of the likely things to get us here is that json->decode fails because error message, not json + $self->return_data( { "ERROR" => $res->content} , 500) ; return undef, $@; } elsif (exists $content->{error}) { - if (exists($content->{error}{type}) && exists($content->{error}{reason}) && exists($content->{status})) { - $self->return_data( {"ERROR" => $content->{error}{type}.": ".$content->{error}{reason}}, $content->{status} ); - } else { - $self->return_data( {"ERROR" => "Invalid Elastic Search return response"}, 500 ); - } - } else { + if (ref $content->{error}) { + eval { + if (exists($content->{error}{type}) && exists($content->{error}{reason}) && exists($content->{status})) { + $self->return_data( {"ERROR" => $content->{error}{type}.": ".$content->{error}{reason}.$res->$content}, $content->{status} ); + } else { + $self->return_data( {"ERROR" => "Invalid Elastic Search return response: $res"}, 500 ); + } + # fallback -- if error, but eval aborts + $self->return_data( { "ERROR" => $res->content }, 500) ; } + } else { # content->error is not ref + $self->return_data( { "ERROR" => "ES response: $content->{error}" }, 500) ; } + } else { #no error return $content; } } From 83f1abaeb0ead9667a4446711e5f6d9a449a1770 Mon Sep 17 00:00:00 2001 From: Will Trimble Date: Thu, 7 Nov 2019 09:42:14 -0600 Subject: [PATCH 03/14] Fix one-character SHOCK syntax error causing bad download filenames --- src/MGRAST/lib/resources/resource.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/MGRAST/lib/resources/resource.pm b/src/MGRAST/lib/resources/resource.pm index 5baca068..a5ad047d 100644 --- a/src/MGRAST/lib/resources/resource.pm +++ b/src/MGRAST/lib/resources/resource.pm @@ -1369,7 +1369,7 @@ sub get_shock_preauth { my $response = undef; eval { my @args = $auth ? ('Authorization', "$authPrefix $auth") : (); - my $get = $self->agent->get($Conf::shock_url.'/node/'.$id.'?download_url'.($fn ? "&filename=".$fn : ""), @args); + my $get = $self->agent->get($Conf::shock_url.'/node/'.$id.'?download_url'.($fn ? "&file_name=".$fn : ""), @args); $response = $self->json->decode( $get->content ); }; if ($@ || (! ref($response))) { From 28c732ed535f5a2954dd0267164be486103c50c0 Mon Sep 17 00:00:00 2001 From: Wolfgang Gerlach Date: Fri, 8 Nov 2019 13:05:49 -0600 Subject: [PATCH 04/14] added project_tags and sample_tags --- src/MGRAST/lib/ElasticSearch.pm | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/MGRAST/lib/ElasticSearch.pm b/src/MGRAST/lib/ElasticSearch.pm index f496e684..de3eabd0 100644 --- a/src/MGRAST/lib/ElasticSearch.pm +++ b/src/MGRAST/lib/ElasticSearch.pm @@ -129,6 +129,8 @@ our $fields = { filter_ambig => 'pipeline_parameters_filter_ambig', filter_ln => 'pipeline_parameters_filter_ln', bowtie => 'pipeline_parameters_bowtie', + project_tags => 'project_tags', + sample_tags => 'sample_tags', }; our $prefixes = { @@ -163,6 +165,7 @@ our $prefixes = { 'project_funding', 'project_name', 'project_id', + 'tags', ], 'sample_' => [ 'biome_id', @@ -191,6 +194,7 @@ our $prefixes = { 'feature', 'metagenome_taxonomy_id', 'env_package', + 'tags', ], 'env_package_' => [ 'env_package_id', @@ -321,6 +325,8 @@ our $types = { filter_ambig => 'boolean', filter_ln => 'boolean', bowtie => 'boolean', + project_tags => 'keyword', + sample_tags => 'keyword', }; our $taxa_num = [ From 38b60df1bdf3ed63c700a8299c18e516005cfbf4 Mon Sep 17 00:00:00 2001 From: Wolfgang Gerlach Date: Mon, 11 Nov 2019 14:06:58 -0600 Subject: [PATCH 05/14] introduce type arrayKeyword for tags --- src/MGRAST/lib/ElasticSearch.pm | 4 ++-- src/MGRAST/lib/resources/resource.pm | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/MGRAST/lib/ElasticSearch.pm b/src/MGRAST/lib/ElasticSearch.pm index de3eabd0..136a9d40 100644 --- a/src/MGRAST/lib/ElasticSearch.pm +++ b/src/MGRAST/lib/ElasticSearch.pm @@ -325,8 +325,8 @@ our $types = { filter_ambig => 'boolean', filter_ln => 'boolean', bowtie => 'boolean', - project_tags => 'keyword', - sample_tags => 'keyword', + project_tags => 'arrayKeyword', + sample_tags => 'arrayKeyword', }; our $taxa_num = [ diff --git a/src/MGRAST/lib/resources/resource.pm b/src/MGRAST/lib/resources/resource.pm index a5ad047d..fd804a52 100644 --- a/src/MGRAST/lib/resources/resource.pm +++ b/src/MGRAST/lib/resources/resource.pm @@ -3448,6 +3448,8 @@ sub jsonTypecast { $val =~ s/^\s+//; $val =~ s/\s+$//; $val =~ s/\s+/ /g; + } elsif ( $type eq 'arrayKeyword' ) { + $val = \split(';', $val); } elsif (($type eq 'integer') || ($type eq 'long')) { if ($val =~ /^[+-]?\d+$/) { $val = int($val); From a9fd6bd778546c15ad8245282733be24d0684a41 Mon Sep 17 00:00:00 2001 From: Wolfgang Gerlach Date: Mon, 11 Nov 2019 14:41:03 -0600 Subject: [PATCH 06/14] try arrayText --- src/MGRAST/lib/ElasticSearch.pm | 4 ++-- src/MGRAST/lib/resources/resource.pm | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/MGRAST/lib/ElasticSearch.pm b/src/MGRAST/lib/ElasticSearch.pm index 136a9d40..92bc6877 100644 --- a/src/MGRAST/lib/ElasticSearch.pm +++ b/src/MGRAST/lib/ElasticSearch.pm @@ -325,8 +325,8 @@ our $types = { filter_ambig => 'boolean', filter_ln => 'boolean', bowtie => 'boolean', - project_tags => 'arrayKeyword', - sample_tags => 'arrayKeyword', + project_tags => 'arrayText', + sample_tags => 'arrayText', }; our $taxa_num = [ diff --git a/src/MGRAST/lib/resources/resource.pm b/src/MGRAST/lib/resources/resource.pm index fd804a52..29fc4e7b 100644 --- a/src/MGRAST/lib/resources/resource.pm +++ b/src/MGRAST/lib/resources/resource.pm @@ -3448,8 +3448,8 @@ sub jsonTypecast { $val =~ s/^\s+//; $val =~ s/\s+$//; $val =~ s/\s+/ /g; - } elsif ( $type eq 'arrayKeyword' ) { - $val = \split(';', $val); + } elsif ( $type eq 'arrayText' ) { + return split(';', $val); } elsif (($type eq 'integer') || ($type eq 'long')) { if ($val =~ /^[+-]?\d+$/) { $val = int($val); From bded19fcef9b93ba938bd1c20a53a0f18977e2bb Mon Sep 17 00:00:00 2001 From: Wolfgang Gerlach Date: Mon, 11 Nov 2019 16:35:48 -0600 Subject: [PATCH 07/14] fix array --- src/MGRAST/lib/resources/resource.pm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/MGRAST/lib/resources/resource.pm b/src/MGRAST/lib/resources/resource.pm index 29fc4e7b..54d8f770 100644 --- a/src/MGRAST/lib/resources/resource.pm +++ b/src/MGRAST/lib/resources/resource.pm @@ -3449,7 +3449,9 @@ sub jsonTypecast { $val =~ s/\s+$//; $val =~ s/\s+/ /g; } elsif ( $type eq 'arrayText' ) { - return split(';', $val); + my @array = split(';', $val); + $val = \@array; + } elsif (($type eq 'integer') || ($type eq 'long')) { if ($val =~ /^[+-]?\d+$/) { $val = int($val); From 1098d1d337d0290e70ed2c3f840044ff17270d56 Mon Sep 17 00:00:00 2001 From: Andreas Wilke Date: Fri, 7 Feb 2020 11:42:43 -0600 Subject: [PATCH 08/14] Added support for tags --- src/MGRAST/lib/resources/search.pm | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/MGRAST/lib/resources/search.pm b/src/MGRAST/lib/resources/search.pm index 80201128..817ff272 100644 --- a/src/MGRAST/lib/resources/search.pm +++ b/src/MGRAST/lib/resources/search.pm @@ -243,10 +243,18 @@ sub query { $query =~ s/^\s+|\s+$//g; $query =~ s/\s+/ /g; # remove specified fields (non-escaped ':'), only using set default - if (($query =~ /:/) && ($query !~ /\\:/)) { + # if (($query =~ /:/) && ($query !~ /\\:/)) { + # my @parts = split(/:/, $query); + # $query = join(" ", @parts[1..$#parts]); + # } + # remove specified fields (non-escaped ':'), only using set default + if (( $field !~ /_tag/ ) && ($query =~ /:/) && ($query !~ /\\:/)) { my @parts = split(/:/, $query); $query = join(" ", @parts[1..$#parts]); } + else { + $query=~s/\:/\\\:/g + } push @$queries, {"field" => $key, "query" => $query, "type" => $type}; } } From 299b8a076c9f374defdaf1f7dd8ca38bce278856 Mon Sep 17 00:00:00 2001 From: Will Trimble Date: Wed, 27 May 2020 14:34:51 -0500 Subject: [PATCH 09/14] Disable ftp heartbeat --- src/MGRAST/lib/resources/heartbeat.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/MGRAST/lib/resources/heartbeat.pm b/src/MGRAST/lib/resources/heartbeat.pm index 3b2a69fd..de3da6a1 100644 --- a/src/MGRAST/lib/resources/heartbeat.pm +++ b/src/MGRAST/lib/resources/heartbeat.pm @@ -18,7 +18,7 @@ sub new { $self->{name} = "heartbeat"; $self->{m5nr_version} = 1; $self->{services} = { - 'FTP' => 'ftp://'.$Conf::ftp_download, +# 'FTP' => 'ftp://'.$Conf::ftp_download, 'website' => $Conf::web_site, 'SHOCK' => $Conf::shock_url, 'SHOCKDB' => 'mongo', @@ -30,7 +30,7 @@ sub new { 'cassandra' => $Conf::cassandra_m5nr }; $self->{attributes} = { "service" => [ 'string', "cv", [ - ['FTP', 'file server'], +# ['FTP', 'file server'], ['website', 'MG-RAST website'], ['SHOCK', 'object storage'], ['SHOCKDB', 'object storage mongodb'], From 2a0e76207cbee1e3e6d573f913f80e210e98f945 Mon Sep 17 00:00:00 2001 From: Will Trimble Date: Wed, 19 Feb 2020 12:44:15 -0600 Subject: [PATCH 10/14] Preserve errors for failed PUT --- src/MGRAST/lib/resources/resource.pm | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/MGRAST/lib/resources/resource.pm b/src/MGRAST/lib/resources/resource.pm index 54d8f770..afea9bbb 100644 --- a/src/MGRAST/lib/resources/resource.pm +++ b/src/MGRAST/lib/resources/resource.pm @@ -2277,13 +2277,12 @@ sub upsert_to_elasticsearch_annotation { $self->json->utf8(); my $success = {}; - # PUT docuemnt(s) + # PUT document(s) foreach my $key (keys %$results) { if ($results->{$key}) { my $entry = $self->json->encode($results->{$key}); my $esurl = $Conf::es_host."/$index/$key/$mgid?parent=$mgid"; my $response = undef; - eval { my @args = ( 'Content_Type', 'application/json', 'Content', $entry @@ -2291,10 +2290,12 @@ sub upsert_to_elasticsearch_annotation { my $req = POST($esurl, @args); $req->method('PUT'); my $put = $self->agent->request($req); + $response = $put->content; # If it's an error message, preserve it. + eval { $response = $self->json->decode($put->content); }; if ($@ || (! ref($response)) || $response->{error} || (! $response->{result})) { - $success->{$key} = "failed"; + $success->{$key} = "failed: $response"; } $success->{$key} = "updated"; } else { From b596e3bde3a51aa08a49f47093de881bc20a2b38 Mon Sep 17 00:00:00 2001 From: Will Trimble Date: Fri, 18 Sep 2020 13:44:54 -0500 Subject: [PATCH 11/14] Trap errors retrieving permissions during auth --- src/MGRAST/lib/Auth.pm | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/MGRAST/lib/Auth.pm b/src/MGRAST/lib/Auth.pm index af20150f..0387e69d 100644 --- a/src/MGRAST/lib/Auth.pm +++ b/src/MGRAST/lib/Auth.pm @@ -89,7 +89,10 @@ sub authenticate { } elsif (exists($response->{error}) && $response->{error}) { $verbose.=', "preferences": "ERROR - '.$response->{error}.'"'; } else { + $verbose.=', "preferences": "" '; + eval{ # Users with corrupt preferences were being denied access here (mgu21652) $verbose.=', "preferences": '.$json->encode($response->{data}->{attributes}->{pref}); + } } } if ($us->has_right(undef, 'edit', 'user', '*')) { From 0970cce91a1388562ee49aa116e09f3b8e4c3d6c Mon Sep 17 00:00:00 2001 From: Will Trimble Date: Fri, 5 Mar 2021 12:05:07 -0600 Subject: [PATCH 12/14] Cleanup of bad entries in cassandra --- src/MGRAST/pylib/mgrast_cassandra.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/MGRAST/pylib/mgrast_cassandra.py b/src/MGRAST/pylib/mgrast_cassandra.py index 3208c485..aa84cb10 100644 --- a/src/MGRAST/pylib/mgrast_cassandra.py +++ b/src/MGRAST/pylib/mgrast_cassandra.py @@ -246,7 +246,7 @@ def get_md5_records(self, job, swap=None, md5s=None, evalue=None, identity=None, rmqLogger(self.channel, 'select', query) rows = self.session.execute(query) for r in rows: - if r[1] == 0: + if r[1] == 0 or r[0] is None: # skip row if zero length, or row is corrupt continue pos = bisect.bisect(found, (r[0], None)) if (pos > 0) and ((found[pos-1][0] + found[pos-1][1]) == r[0]): From 915fe58e6bbf4044302374a223b31232cc92d6ab Mon Sep 17 00:00:00 2001 From: Will Trimble Date: Sun, 7 Mar 2021 14:46:52 -0600 Subject: [PATCH 13/14] Pin version of xlrd old excel library --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a4fcc566..5177de19 100755 --- a/Dockerfile +++ b/Dockerfile @@ -45,7 +45,7 @@ RUN apt-get install -y python-dev python-pip && \ pip install \ openpyxl \ gspread \ - xlrd \ + xlrd==1.2.0 \ lepl \ requests_toolbelt \ cassandra-driver \ From f954ce4103350afdb51a2796b27b1cc2fb84478f Mon Sep 17 00:00:00 2001 From: Will Trimble Date: Fri, 12 Mar 2021 14:26:33 -0600 Subject: [PATCH 14/14] Fixed xlsx parser.. restored try..except for xls fallback to xlsx --- src/MGRAST/bin/validate_metadata | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/MGRAST/bin/validate_metadata b/src/MGRAST/bin/validate_metadata index 90276a88..0a759d6c 100755 --- a/src/MGRAST/bin/validate_metadata +++ b/src/MGRAST/bin/validate_metadata @@ -285,8 +285,8 @@ def google_to_mdraw(url, dupe, inc, login, password): return tables_to_mdraw(tables, dupe) def xlsx_to_mdraw(xxfile, dupe, inc): - xxbook = load_workbook(filename = xxfile) - tables = dict([(x, sheet_to_table(xxbook[x], 'xlsx')) for x in xxbook.sheetnames()]) + xxbook = load_workbook(filename = xxfile, data_only="True") + tables = dict([(x, sheet_to_table(xxbook[x], 'xlsx')) for x in xxbook.sheetnames]) if inc: return tables_to_mdraw_inc(tables, dupe) else: @@ -307,9 +307,18 @@ def sheet_to_table(sheet, source): maxCol = sheet.max_column for r in range(maxRow): row = [] - for c in range(maxCol): - row.append( sheet.cell(row=r,column=c).value ) - table.append(row) + rowcontents = [ sheet.cell(row=r+1, column=i+1).value for i in range(maxCol)] + # do not add empty rows, unless there is only one column + if not all([ element is None for element in rowcontents] ) or maxCol == 1 : + for c in range(maxCol): + value = sheet.cell(row=r+1,column=c+1).value + # sanitize None values to empty strings + if value is None: + row.append("") + else: + row.append( value ) + table.append(row) + if source == 'xls': maxRow = sheet.nrows maxCol = sheet.ncols @@ -691,7 +700,7 @@ def main(args): try: mdraw = xls_to_mdraw(md_source, opts.dupe, opts.inc) except: - errorMsg('Invald file format: '+md_source, 1) + errorMsg('Error loading xlsx, xls file: '+md_source, 1) elif opts.format == 'xls': try: mdraw = xls_to_mdraw(md_source, opts.dupe, opts.inc) @@ -699,7 +708,7 @@ def main(args): try: mdraw = xlsx_to_mdraw(md_source, opts.dupe, opts.inc) except: - errorMsg('Invald file format: '+md_source, 1) + errorMsg('Error loading xls, xlsx file: '+md_source, 1) elif opts.format == 'google': mdraw = google_to_mdraw(md_source, opts.dupe, opts.inc, opts.login, opts.password) #pprint.pprint(mdraw)