1. Get all steps in the pangenome build, with their start and end positions

    SELECT ?step ?bp ?ep ?path WHERE {?step a vg:Step; faldo:begin ?bp ; faldo:end ?ep . ?bp a faldo:Position ; faldo:position ?begin . ?ep a faldo:Position ; faldo:reference ?path ; faldo:position ?end . }Use
  2. Get UniProt proteins linked to a vg:Path in this graph

    SELECT DISTINCT ?uniprot ?path WHERE { ?path a vg:Path . ?embl up:locatedOn/owl:sameAs ?path . ?uniprot rdfs:seeAlso ?embl . }Use
  3. For the exons annotated in the INDSC records show the nodes in the pangenome graph that overlap

    SELECT DISTINCT ?node ?annotation ?sbpValue ?bpValue ?sepValue ?epValue WHERE { ?path a vg:Path ; skos:closeMatch ?genome . ?uniprot rdfs:seeAlso ?emblcds . ?emblcds up:locatedOn/owl:sameAs ?genome . ?region faldo:begin ?bp ; faldo:end ?ep . ?bp faldo:reference ?p ; faldo:position ?bpValue . ?ep faldo:reference ?p ; faldo:position ?epValue . ?annotation faldo:location ?region ; a insdcschema:Exon MINUS {?region a vg:Step } ?step a vg:Step ; vg:path ?path ; vg:node ?node ; faldo:begin/faldo:position ?sbpValue ; faldo:end/faldo:position ?sepValue . FILTER((?bpValue <= ?sbpValue) && (?epValue >= ?sepValue)) }Use
  4. Find annotations that are annotated in CDS that differ by one in pangenome node space from a other CDS on the places where these mismatch.

    SELECT DISTINCT ?insdCDS #?insdCDSBegin ?insdCDSEnd ?step ?path ?node ?nextNode ?nextPath ?insdcStepBegin ?insdcStepEnd ?uniprotSequence ?uniprot ?stepBeginInProteinSpace ?stepEndInProteinSpace ?annotationText { # Find CDS annoted by INDSC that do not match a UniProt protein. ?insdCDS insdcschema:translation ?sequence ; a insdcschema:Coding_Sequence ; faldo:location ?insdCDSLocation . MINUS { ?uniprotSequence rdf:value ?sequence . } # Get the range of this CDS and make sure the coordinates are on the # path we need later ?insdCDSLocation faldo:begin [ faldo:reference ?path ; faldo:position ?insdCDSBegin] ; faldo:end [ faldo:reference ?path ; faldo:position ?insdCDSEnd] . ?step a vg:Step ; vg:path/skos:closeMatch ?path ; vg:node ?node ; faldo:begin [ faldo:reference/skos:closeMatch ?path ; faldo:position ?insdcStepBegin ] ; faldo:end [ faldo:reference/skos:closeMatch ?path ; faldo:position ?insdcStepEnd ] . ## I always forget how to interval ranges :( FILTER ( (?insdcStepBegin >= ?insdCDSBegin && ?insdcStepBegin <= ?insdCDSEnd) || (?insdCDSBegin >= ?insdcStepBegin && ?insdCDSBegin <= ?insdcStepEnd) || (?insdcStepEnd >= ?insdCDSEnd && ?insdcStepEnd <= ?insdCDSBegin) || (?insdCDSEnd >= ?insdcStepEnd && ?insdCDSEnd <= ?insdcStepBegin) ) ## Then we look for a node close to the ones in the CDS in genome graph space (one step) ?node vg:linksForwardToForward ?nextNode . ?step2 a vg:Step ; vg:path/skos:closeMatch ?nextPath ; vg:node ?nextNode . ## Where that node is on a uniprot matching sequence ?nextinsdCDS insdcschema:translation ?nextSequence ; a insdcschema:Coding_Sequence ; faldo:location/faldo:begin/faldo:reference ?nextPath . ?uniprot up:sequence/rdf:value ?nextSequence . BIND(IF(?insdCDSBegin > ?insdcStepBegin, ?insdCDSBegin, ?insdcStepBegin - ?insdCDSBegin)/3 AS ?stepBeginInProteinSpace) BIND(IF(?insdCDSEnd > ?insdcStepEnd, ?insdcStepEnd, ?insdCDSBegin - ?insdcStepEnd)/3 AS ?stepEndInProteinSpace) ?uniprot up:annotation ?annotation . ?annotation a up:Active_Site_Annotation . ?annotation up:range ?annotationRegion . ?annotation rdfs:comment ?annotationText . ?annotationRegion faldo:begin/faldo:position ?annotationBegin . ?annotationRegion faldo:end/faldo:position ?annotationEnd . FILTER (?annotationBegin >= ?stepBeginInProteinSpace && ?annotationEnd < ?stepEndInProteinSpace ) }Use
  5. Polymorphisms located on ACE2 and TMPRSS2 and affecting proteins’ activity, structure, PTM...

    SELECT DISTINCT (STR(?gn) AS ?genename) ?pos (STR(?snpac) AS ?varid) (str(?orgaa) AS ?orgAA) ?annot_type (STR(?txt) AS ?note) (STR(?varaa) AS ?varAA) ?freq WHERE { VALUES ?entry {nextprot:NX_Q9BYF1 nextprot:NX_O15393} # proteins of interest (ACE2, TMPRSS2) ?entry np:gene /np:name ?gn. ?entry np:isoform ?iso . ?iso np:swissprotDisplayed true; np:variant ?var . ?var np:start ?pos ; np:original ?orgaa; np:variation ?varaa . ?var np:evidence / np:allele-frequency ?freq . ?var np:evidence / np:reference ?xref . ?xref np:provenance <http://nextprot.org/rdf/db/dbSNP>; np:accession ?snpac . ?iso np:positionalAnnotation ?annot . optional {?annot rdfs:comment ?txt .} ?annot a ?annot_type . { ?annot np:start ?pos; np:end ?pos. filter (not exists {?annot a np:Variant. }) filter (not exists {?annot a np:SequenceConflict. }) } UNION { ?annot a np:DisulfideBond. {?annot np:start ?pos. } UNION {?annot np:end ?pos. } } } ORDER BY ?entry ?posUse