Provide a comprehensive cross-database identifier and functional mapping reference for human GBA. This should serve as a definitive lookup resource for researchers. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 1: GENE IDENTIFIERS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Provide ALL gene-level database identifiers: - HGNC ID and approved symbol - Ensembl gene ID (ENSG) - NCBI Entrez Gene ID - OMIM gene/locus ID - Genomic location: chromosome, start position, end position, strand ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 2: TRANSCRIPT IDENTIFIERS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ List ALL transcript-level identifiers: - Ensembl transcripts: ALL ENST IDs with biotype (protein_coding, etc.) How many total transcripts? - RefSeq transcripts: ALL NM_ mRNA accessions Mark which is MANE Select (canonical clinical standard) - CCDS IDs: ALL consensus coding sequence identifiers For the CANONICAL/MANE SELECT transcript: - List ALL exon IDs (ENSE) with genomic coordinates - Total exon count ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 3: PROTEIN IDENTIFIERS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ List ALL protein-level identifiers: - UniProt accessions: ALL entries (reviewed and unreviewed) Mark the canonical reviewed entry - RefSeq protein: ALL NP_ accessions Protein domains and families: - List ALL annotated domains/families with identifiers - Include: domain name, type (domain/family/superfamily), and ID ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 4: STRUCTURE IDENTIFIERS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Experimental structures: - List ALL PDB structure IDs - For each: experimental method (X-ray, NMR, Cryo-EM) and resolution - Total PDB structure count Predicted structures: - AlphaFold model ID and confidence metrics (pLDDT) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 5: CROSS-SPECIES ORTHOLOGS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ List orthologous genes in key model organisms (where available): - Mouse (Mus musculus): gene ID, symbol - Rat (Rattus norvegicus): gene ID, symbol - Zebrafish (Danio rerio): gene ID, symbol - Fruit fly (Drosophila melanogaster): gene ID, symbol - Worm (C. elegans): gene ID, symbol - Yeast (S. cerevisiae): gene ID, symbol ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 6: CLINICAL VARIANTS & AI PREDICTIONS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Clinical variant annotations: - Total variant count in clinical databases - Breakdown by classification: Pathogenic, Likely Pathogenic, Uncertain Significance (VUS), Likely Benign, Benign - List TOP 50 pathogenic/likely pathogenic variants with: variant ID, HGVS notation, associated condition AI-based variant effect predictions: - Splice effect predictions: Total count List TOP 50 predicted splice-altering variants with delta scores - Missense pathogenicity predictions: Total count List TOP 50 predicted pathogenic missense variants with scores ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 7: BIOLOGICAL PATHWAYS & GENE ONTOLOGY ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Pathway membership: - List ALL biological pathways this gene participates in - Include pathway IDs and names - Total pathway count Gene Ontology annotations: - Biological Process: count and TOP 20 terms with IDs - Molecular Function: count and TOP 20 terms with IDs - Cellular Component: count and TOP 20 terms with IDs ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 8: PROTEIN INTERACTIONS & MOLECULAR NETWORKS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Protein-protein interactions: - Total interaction count - List TOP 50 highest-confidence interacting proteins with scores Protein similarity (evolutionary and structural): - Structural/embedding similarity: How many similar proteins? List TOP 20 with similarity scores - Sequence homology: How many homologous proteins? List TOP 20 with identity/similarity scores ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 9: TRANSCRIPTION FACTOR REGULATORY DATA ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ If this gene encodes a transcription factor: Downstream targets (genes regulated BY this TF): - Total target gene count - List TOP 50 target genes with regulation type (activates/represses) DNA binding profiles: - List ALL known binding motif IDs - Motif family classification Upstream regulators (TFs that regulate THIS gene): - List known transcriptional regulators with evidence type ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 10: DRUG & PHARMACOLOGY DATA ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ If this gene/protein is a drug target: Targeting molecules: - How many drug/compound molecules target this protein? - List TOP 30 molecules by development phase - Include: molecule ID, name, mechanism, highest development phase Clinical trials: - How many clinical trials involve drugs targeting this gene? - List TOP 20 trials with: trial ID, phase, status, intervention Pharmacogenomics: - Known drug-gene interactions affecting drug response - Dosing guidelines if any exist ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 11: EXPRESSION PROFILES ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Tissue expression: - Which tissues express this gene most highly? - List TOP 30 tissues with expression scores/levels - Note any tissue-specific or tissue-enriched patterns Cell type expression: - Which cell types show highest expression? - List TOP 30 cell types with expression scores - Note any cell type-specific patterns Single-cell expression data (if available): - Which single-cell datasets/experiments include this gene? - Notable cell population patterns ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 12: DISEASE ASSOCIATIONS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Mendelian/monogenic disease links: - What diseases are caused by mutations in this gene? - List ALL associated diseases with: disease name, disease ID, inheritance pattern, evidence level Phenotype associations: - What clinical phenotypes are associated with this gene? - List TOP 50 phenotype terms with IDs Complex trait associations (GWAS): - What traits/diseases are linked via genome-wide association studies? - List TOP 30 GWAS associations with: trait, study ID, p-value, effect size if available ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ FORMATTING REQUIREMENTS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - Present each section with clear headers - Use tables for listing multiple items - Always include COUNTS for every category - For lists with 50 items: show total count + TOP 50 - For lists with

Question

Provide a comprehensive cross-database identifier and functional mapping
reference for human GBA. This should serve as a definitive
lookup resource for researchers.

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 1: GENE IDENTIFIERS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Provide ALL gene-level database identifiers:
- HGNC ID and approved symbol
- Ensembl gene ID (ENSG)
- NCBI Entrez Gene ID
- OMIM gene/locus ID
- Genomic location: chromosome, start position, end position, strand

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 2: TRANSCRIPT IDENTIFIERS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
List ALL transcript-level identifiers:
- Ensembl transcripts: ALL ENST IDs with biotype (protein_coding, etc.)
  How many total transcripts?
- RefSeq transcripts: ALL NM_ mRNA accessions
  Mark which is MANE Select (canonical clinical standard)
- CCDS IDs: ALL consensus coding sequence identifiers

For the CANONICAL/MANE SELECT transcript:
- List ALL exon IDs (ENSE) with genomic coordinates
- Total exon count

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 3: PROTEIN IDENTIFIERS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
List ALL protein-level identifiers:
- UniProt accessions: ALL entries (reviewed and unreviewed)
  Mark the canonical reviewed entry
- RefSeq protein: ALL NP_ accessions

Protein domains and families:
- List ALL annotated domains/families with identifiers
- Include: domain name, type (domain/family/superfamily), and ID

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 4: STRUCTURE IDENTIFIERS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Experimental structures:
- List ALL PDB structure IDs
- For each: experimental method (X-ray, NMR, Cryo-EM) and resolution
- Total PDB structure count

Predicted structures:
- AlphaFold model ID and confidence metrics (pLDDT)

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 5: CROSS-SPECIES ORTHOLOGS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
List orthologous genes in key model organisms (where available):
- Mouse (Mus musculus): gene ID, symbol
- Rat (Rattus norvegicus): gene ID, symbol
- Zebrafish (Danio rerio): gene ID, symbol
- Fruit fly (Drosophila melanogaster): gene ID, symbol
- Worm (C. elegans): gene ID, symbol
- Yeast (S. cerevisiae): gene ID, symbol

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 6: CLINICAL VARIANTS & AI PREDICTIONS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Clinical variant annotations:
- Total variant count in clinical databases
- Breakdown by classification: Pathogenic, Likely Pathogenic,
  Uncertain Significance (VUS), Likely Benign, Benign
- List TOP 50 pathogenic/likely pathogenic variants with:
  variant ID, HGVS notation, associated condition

AI-based variant effect predictions:
- Splice effect predictions: Total count
  List TOP 50 predicted splice-altering variants with delta scores
- Missense pathogenicity predictions: Total count
  List TOP 50 predicted pathogenic missense variants with scores

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 7: BIOLOGICAL PATHWAYS & GENE ONTOLOGY
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Pathway membership:
- List ALL biological pathways this gene participates in
- Include pathway IDs and names
- Total pathway count

Gene Ontology annotations:
- Biological Process: count and TOP 20 terms with IDs
- Molecular Function: count and TOP 20 terms with IDs
- Cellular Component: count and TOP 20 terms with IDs

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 8: PROTEIN INTERACTIONS & MOLECULAR NETWORKS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Protein-protein interactions:
- Total interaction count
- List TOP 50 highest-confidence interacting proteins with scores

Protein similarity (evolutionary and structural):
- Structural/embedding similarity: How many similar proteins?
  List TOP 20 with similarity scores
- Sequence homology: How many homologous proteins?
  List TOP 20 with identity/similarity scores

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 9: TRANSCRIPTION FACTOR REGULATORY DATA
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
If this gene encodes a transcription factor:

Downstream targets (genes regulated BY this TF):
- Total target gene count
- List TOP 50 target genes with regulation type (activates/represses)

DNA binding profiles:
- List ALL known binding motif IDs
- Motif family classification

Upstream regulators (TFs that regulate THIS gene):
- List known transcriptional regulators with evidence type

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 10: DRUG & PHARMACOLOGY DATA
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
If this gene/protein is a drug target:

Targeting molecules:
- How many drug/compound molecules target this protein?
- List TOP 30 molecules by development phase
- Include: molecule ID, name, mechanism, highest development phase

Clinical trials:
- How many clinical trials involve drugs targeting this gene?
- List TOP 20 trials with: trial ID, phase, status, intervention

Pharmacogenomics:
- Known drug-gene interactions affecting drug response
- Dosing guidelines if any exist

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 11: EXPRESSION PROFILES
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Tissue expression:
- Which tissues express this gene most highly?
- List TOP 30 tissues with expression scores/levels
- Note any tissue-specific or tissue-enriched patterns

Cell type expression:
- Which cell types show highest expression?
- List TOP 30 cell types with expression scores
- Note any cell type-specific patterns

Single-cell expression data (if available):
- Which single-cell datasets/experiments include this gene?
- Notable cell population patterns

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 12: DISEASE ASSOCIATIONS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Mendelian/monogenic disease links:
- What diseases are caused by mutations in this gene?
- List ALL associated diseases with: disease name, disease ID,
  inheritance pattern, evidence level

Phenotype associations:
- What clinical phenotypes are associated with this gene?
- List TOP 50 phenotype terms with IDs

Complex trait associations (GWAS):
- What traits/diseases are linked via genome-wide association studies?
- List TOP 30 GWAS associations with: trait, study ID, p-value,
  effect size if available

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
FORMATTING REQUIREMENTS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
- Present each section with clear headers
- Use tables for listing multiple items
- Always include COUNTS for every category
- For lists with >50 items: show total count + TOP 50
- For lists with <50 items: show ALL items
- Always include database identifiers (IDs), not just names
- Mark canonical/primary/reviewed entries where applicable
- If a section has no data available, state "No data available"
  rather than omitting the section

Accepted Answer

DatabaseIdentifierNotes HGNC IDHGNC:4177Approved Approved SymbolGBA1 Previous SymbolsGBA, GLUC Approved Nameglucosylceramidase beta 1 Ensembl GeneENSG00000177628 NCBI Entrez Gene2629 OMIM Gene/Locus606463 Locus Groupprotein-coding gene Genomic Location (GRCh38) AttributeValue Chromosome1 Cytogenetic Band1q22 Start Position155,234,447 End Position155,245,178 StrandMinus (-) Gene Span10,731 bp Section 2: Transcript Identifiers Ensembl Transcripts Total Transcripts: 27
Transcript …

Transcript ID	Biotype	Start	End	Notes
ENST00000327247	protein_coding	155,234,452	155,244,670	Canonical
ENST00000368373	protein_coding	155,234,452	155,241,249
ENST00000427500	protein_coding	155,234,559	155,241,275
ENST00000428024	protein_coding	155,234,868	155,244,699
ENST00000852359	protein_coding	155,234,453	155,245,178
ENST00000852360	protein_coding	155,234,447	155,244,699
ENST00000852361	protein_coding	155,234,453	155,244,690
ENST00000852362	protein_coding	155,234,449	155,244,678
ENST00000852363	protein_coding	155,234,452	155,244,677
ENST00000852364	protein_coding	155,234,453	155,244,673
ENST00000852365	protein_coding	155,234,453	155,244,665
ENST00000852366	protein_coding	155,234,452	155,244,379
ENST00000852367	protein_coding	155,234,452	155,241,226
ENST00000852368	protein_coding	155,234,933	155,241,262
ENST00000948996	protein_coding	155,234,453	155,244,673
ENST00000948997	protein_coding	155,234,453	155,244,629
ENST00000948998	protein_coding	155,234,453	155,241,263
ENST00000460156	protein_coding_CDS_not_defined	155,237,932	155,238,891
ENST00000464536	protein_coding_CDS_not_defined	155,234,453	155,235,835
ENST00000467918	protein_coding_CDS_not_defined	155,239,635	155,244,606
ENST00000470104	protein_coding_CDS_not_defined	155,239,960	155,241,243
ENST00000473570	protein_coding_CDS_not_defined	155,238,564	155,244,622
ENST00000478472	protein_coding_CDS_not_defined	155,234,961	155,236,459
ENST00000484489	protein_coding_CDS_not_defined	155,235,016	155,241,231
ENST00000491081	protein_coding_CDS_not_defined	155,236,182	155,238,709
ENST00000493842	protein_coding_CDS_not_defined	155,238,213	155,244,636
ENST00000497670	protein_coding_CDS_not_defined	155,236,396	155,239,962

RefSeq ID	Type	Status	MANE Select
NM_000157	mRNA	REVIEWED	✓ YES
NM_001005741	mRNA	REVIEWED	No
NM_001005742	mRNA	REVIEWED	No
NM_001171811	mRNA	REVIEWED	No
NM_001171812	mRNA	REVIEWED	No

RefSeq ID	Type	Status	MANE Select
NP_000148	protein	REVIEWED	✓ YES
NP_001005741	protein	REVIEWED	No
NP_001005742	protein	REVIEWED	No
NP_001165282	protein	REVIEWED	No
NP_001165283	protein	REVIEWED	No

GBA Gene Complete Identifier and Functional Mapping Reference

Structured Data Sources

Database	Identifier	Notes
HGNC ID	HGNC:4177	Approved
Approved Symbol	GBA1
Previous Symbols	GBA, GLUC
Approved Name	glucosylceramidase beta 1
Ensembl Gene	ENSG00000177628
NCBI Entrez Gene	2629
OMIM Gene/Locus	606463
Locus Group	protein-coding gene

Attribute	Value
Chromosome	1
Cytogenetic Band	1q22
Start Position	155,234,447
End Position	155,245,178
Strand	Minus (-)
Gene Span	10,731 bp