Provide a comprehensive cross-database identifier and functional mapping reference for human ARID1A. This should serve as a definitive lookup resource for researchers. ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 1: GENE IDENTIFIERS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Provide ALL gene-level database identifiers: - HGNC ID and approved symbol - Ensembl gene ID (ENSG) - NCBI Entrez Gene ID - OMIM gene/locus ID - Genomic location: chromosome, start position, end position, strand ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 2: TRANSCRIPT IDENTIFIERS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ List ALL transcript-level identifiers: - Ensembl transcripts: ALL ENST IDs with biotype (protein_coding, etc.) How many total transcripts? - RefSeq transcripts: ALL NM_ mRNA accessions Mark which is MANE Select (canonical clinical standard) - CCDS IDs: ALL consensus coding sequence identifiers For the CANONICAL/MANE SELECT transcript: - List ALL exon IDs (ENSE) with genomic coordinates - Total exon count ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 3: PROTEIN IDENTIFIERS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ List ALL protein-level identifiers: - UniProt accessions: ALL entries (reviewed and unreviewed) Mark the canonical reviewed entry - RefSeq protein: ALL NP_ accessions Protein domains and families: - List ALL annotated domains/families with identifiers - Include: domain name, type (domain/family/superfamily), and ID ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 4: STRUCTURE IDENTIFIERS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Experimental structures: - List ALL PDB structure IDs - For each: experimental method (X-ray, NMR, Cryo-EM) and resolution - Total PDB structure count Predicted structures: - AlphaFold model ID and confidence metrics (pLDDT) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 5: CROSS-SPECIES ORTHOLOGS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ List orthologous genes in key model organisms (where available): - Mouse (Mus musculus): gene ID, symbol - Rat (Rattus norvegicus): gene ID, symbol - Zebrafish (Danio rerio): gene ID, symbol - Fruit fly (Drosophila melanogaster): gene ID, symbol - Worm (C. elegans): gene ID, symbol - Yeast (S. cerevisiae): gene ID, symbol ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 6: CLINICAL VARIANTS & AI PREDICTIONS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Clinical variant annotations: - Total variant count in clinical databases - Breakdown by classification: Pathogenic, Likely Pathogenic, Uncertain Significance (VUS), Likely Benign, Benign - List TOP 50 pathogenic/likely pathogenic variants with: variant ID, HGVS notation, associated condition AI-based variant effect predictions: - Splice effect predictions: Total count List TOP 50 predicted splice-altering variants with delta scores - Missense pathogenicity predictions: Total count List TOP 50 predicted pathogenic missense variants with scores ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 7: BIOLOGICAL PATHWAYS & GENE ONTOLOGY ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Pathway membership: - List ALL biological pathways this gene participates in - Include pathway IDs and names - Total pathway count Gene Ontology annotations: - Biological Process: count and TOP 20 terms with IDs - Molecular Function: count and TOP 20 terms with IDs - Cellular Component: count and TOP 20 terms with IDs ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 8: PROTEIN INTERACTIONS & MOLECULAR NETWORKS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Protein-protein interactions: - Total interaction count - List TOP 50 highest-confidence interacting proteins with scores Protein similarity (evolutionary and structural): - Structural/embedding similarity: How many similar proteins? List TOP 20 with similarity scores - Sequence homology: How many homologous proteins? List TOP 20 with identity/similarity scores ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 9: TRANSCRIPTION FACTOR REGULATORY DATA ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ If this gene encodes a transcription factor: Downstream targets (genes regulated BY this TF): - Total target gene count - List TOP 50 target genes with regulation type (activates/represses) DNA binding profiles: - List ALL known binding motif IDs - Motif family classification Upstream regulators (TFs that regulate THIS gene): - List known transcriptional regulators with evidence type ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 10: DRUG & PHARMACOLOGY DATA ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ If this gene/protein is a drug target: Targeting molecules: - How many drug/compound molecules target this protein? - List TOP 30 molecules by development phase - Include: molecule ID, name, mechanism, highest development phase Clinical trials: - How many clinical trials involve drugs targeting this gene? - List TOP 20 trials with: trial ID, phase, status, intervention Pharmacogenomics: - Known drug-gene interactions affecting drug response - Dosing guidelines if any exist ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 11: EXPRESSION PROFILES ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Tissue expression: - Which tissues express this gene most highly? - List TOP 30 tissues with expression scores/levels - Note any tissue-specific or tissue-enriched patterns Cell type expression: - Which cell types show highest expression? - List TOP 30 cell types with expression scores - Note any cell type-specific patterns Single-cell expression data (if available): - Which single-cell datasets/experiments include this gene? - Notable cell population patterns ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ SECTION 12: DISEASE ASSOCIATIONS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ Mendelian/monogenic disease links: - What diseases are caused by mutations in this gene? - List ALL associated diseases with: disease name, disease ID, inheritance pattern, evidence level Phenotype associations: - What clinical phenotypes are associated with this gene? - List TOP 50 phenotype terms with IDs Complex trait associations (GWAS): - What traits/diseases are linked via genome-wide association studies? - List TOP 30 GWAS associations with: trait, study ID, p-value, effect size if available ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ FORMATTING REQUIREMENTS ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - Present each section with clear headers - Use tables for listing multiple items - Always include COUNTS for every category - For lists with 50 items: show total count + TOP 50 - For lists with

Question

Provide a comprehensive cross-database identifier and functional mapping
reference for human ARID1A. This should serve as a definitive
lookup resource for researchers.

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 1: GENE IDENTIFIERS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Provide ALL gene-level database identifiers:
- HGNC ID and approved symbol
- Ensembl gene ID (ENSG)
- NCBI Entrez Gene ID
- OMIM gene/locus ID
- Genomic location: chromosome, start position, end position, strand

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 2: TRANSCRIPT IDENTIFIERS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
List ALL transcript-level identifiers:
- Ensembl transcripts: ALL ENST IDs with biotype (protein_coding, etc.)
  How many total transcripts?
- RefSeq transcripts: ALL NM_ mRNA accessions
  Mark which is MANE Select (canonical clinical standard)
- CCDS IDs: ALL consensus coding sequence identifiers

For the CANONICAL/MANE SELECT transcript:
- List ALL exon IDs (ENSE) with genomic coordinates
- Total exon count

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 3: PROTEIN IDENTIFIERS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
List ALL protein-level identifiers:
- UniProt accessions: ALL entries (reviewed and unreviewed)
  Mark the canonical reviewed entry
- RefSeq protein: ALL NP_ accessions

Protein domains and families:
- List ALL annotated domains/families with identifiers
- Include: domain name, type (domain/family/superfamily), and ID

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 4: STRUCTURE IDENTIFIERS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Experimental structures:
- List ALL PDB structure IDs
- For each: experimental method (X-ray, NMR, Cryo-EM) and resolution
- Total PDB structure count

Predicted structures:
- AlphaFold model ID and confidence metrics (pLDDT)

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 5: CROSS-SPECIES ORTHOLOGS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
List orthologous genes in key model organisms (where available):
- Mouse (Mus musculus): gene ID, symbol
- Rat (Rattus norvegicus): gene ID, symbol
- Zebrafish (Danio rerio): gene ID, symbol
- Fruit fly (Drosophila melanogaster): gene ID, symbol
- Worm (C. elegans): gene ID, symbol
- Yeast (S. cerevisiae): gene ID, symbol

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 6: CLINICAL VARIANTS & AI PREDICTIONS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Clinical variant annotations:
- Total variant count in clinical databases
- Breakdown by classification: Pathogenic, Likely Pathogenic,
  Uncertain Significance (VUS), Likely Benign, Benign
- List TOP 50 pathogenic/likely pathogenic variants with:
  variant ID, HGVS notation, associated condition

AI-based variant effect predictions:
- Splice effect predictions: Total count
  List TOP 50 predicted splice-altering variants with delta scores
- Missense pathogenicity predictions: Total count
  List TOP 50 predicted pathogenic missense variants with scores

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 7: BIOLOGICAL PATHWAYS & GENE ONTOLOGY
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Pathway membership:
- List ALL biological pathways this gene participates in
- Include pathway IDs and names
- Total pathway count

Gene Ontology annotations:
- Biological Process: count and TOP 20 terms with IDs
- Molecular Function: count and TOP 20 terms with IDs
- Cellular Component: count and TOP 20 terms with IDs

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 8: PROTEIN INTERACTIONS & MOLECULAR NETWORKS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Protein-protein interactions:
- Total interaction count
- List TOP 50 highest-confidence interacting proteins with scores

Protein similarity (evolutionary and structural):
- Structural/embedding similarity: How many similar proteins?
  List TOP 20 with similarity scores
- Sequence homology: How many homologous proteins?
  List TOP 20 with identity/similarity scores

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 9: TRANSCRIPTION FACTOR REGULATORY DATA
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
If this gene encodes a transcription factor:

Downstream targets (genes regulated BY this TF):
- Total target gene count
- List TOP 50 target genes with regulation type (activates/represses)

DNA binding profiles:
- List ALL known binding motif IDs
- Motif family classification

Upstream regulators (TFs that regulate THIS gene):
- List known transcriptional regulators with evidence type

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 10: DRUG & PHARMACOLOGY DATA
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
If this gene/protein is a drug target:

Targeting molecules:
- How many drug/compound molecules target this protein?
- List TOP 30 molecules by development phase
- Include: molecule ID, name, mechanism, highest development phase

Clinical trials:
- How many clinical trials involve drugs targeting this gene?
- List TOP 20 trials with: trial ID, phase, status, intervention

Pharmacogenomics:
- Known drug-gene interactions affecting drug response
- Dosing guidelines if any exist

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 11: EXPRESSION PROFILES
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Tissue expression:
- Which tissues express this gene most highly?
- List TOP 30 tissues with expression scores/levels
- Note any tissue-specific or tissue-enriched patterns

Cell type expression:
- Which cell types show highest expression?
- List TOP 30 cell types with expression scores
- Note any cell type-specific patterns

Single-cell expression data (if available):
- Which single-cell datasets/experiments include this gene?
- Notable cell population patterns

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
SECTION 12: DISEASE ASSOCIATIONS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Mendelian/monogenic disease links:
- What diseases are caused by mutations in this gene?
- List ALL associated diseases with: disease name, disease ID,
  inheritance pattern, evidence level

Phenotype associations:
- What clinical phenotypes are associated with this gene?
- List TOP 50 phenotype terms with IDs

Complex trait associations (GWAS):
- What traits/diseases are linked via genome-wide association studies?
- List TOP 30 GWAS associations with: trait, study ID, p-value,
  effect size if available

━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
FORMATTING REQUIREMENTS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
- Present each section with clear headers
- Use tables for listing multiple items
- Always include COUNTS for every category
- For lists with >50 items: show total count + TOP 50
- For lists with <50 items: show ALL items
- Always include database identifiers (IDs), not just names
- Mark canonical/primary/reviewed entries where applicable
- If a section has no data available, state "No data available"
  rather than omitting the section

Accepted Answer

Section 1: Gene Identifiers Primary Gene Identifiers
DatabaseIdentifierDescription HGNCHGNC:11110Approved symbol: ARID1A EnsemblENSG00000117713Human gene NCBI Entrez8289Gene ID OMIM603024Gene/locus MIM Gene Names and Aliases CategoryValue Approved NameAT-rich interaction domain 1A Locus TypeProtein-coding gene StatusApproved Previous SymbolsC1orf4, SMARCF1 AliasesB120, P270, BAF250, BAF250a, OSA1, hELD, hOSA1, BM029, CSS2, ELD, MRD14 Genomic Location AttributeValue Chromosome1 Cytogenetic …

Attribute	Value
Chromosome	1
Cytogenetic Band	1p36.11
Start Position	26,693,236 bp (GRCh38)
End Position	26,782,104 bp (GRCh38)
Strand	+ (positive/forward)
Gene Length	88,869 bp

Transcript ID	Biotype	Start	End	Coordinates
ENST00000324856	protein_coding	26,696,015	26,782,104	Canonical
ENST00000850904	protein_coding	26,696,015	26,782,104
ENST00000374152	protein_coding	26,728,912	26,780,756
ENST00000430799	protein_coding	26,693,236	26,781,177
ENST00000457599	protein_coding	26,695,164	26,780,817
ENST00000524572	protein_coding	26,727,708	26,731,318
ENST00000636219	protein_coding	26,729,657	26,782,102
ENST00000637465	protein_coding	26,696,032	26,731,568
ENST00000466382	nonsense_mediated_decay	26,772,964	26,781,180
ENST00000532781	nonsense_mediated_decay	26,774,870	26,780,983
ENST00000636794	nonsense_mediated_decay	26,772,967	26,775,707
ENST00000430291	retained_intron	26,766,270	26,771,191
ENST00000636072	retained_intron	26,772,963	26,774,962
ENST00000636110	retained_intron	26,769,174	26,772,623
ENST00000636422	retained_intron	26,764,265	26,765,878
ENST00000637788	retained_intron	26,778,099	26,781,144
ENST00000636958	protein_coding_CDS_not_defined	26,752,952	26,765,080

ARID1A Gene Complete Identifier and Functional Mapping Reference

Structured Data Sources

Database	Identifier	Description
HGNC	HGNC:11110	Approved symbol: ARID1A
Ensembl	ENSG00000117713	Human gene
NCBI Entrez	8289	Gene ID
OMIM	603024	Gene/locus MIM

Category	Value
Approved Name	AT-rich interaction domain 1A
Locus Type	Protein-coding gene
Status	Approved
Previous Symbols	C1orf4, SMARCF1
Aliases	B120, P270, BAF250, BAF250a, OSA1, hELD, hOSA1, BM029, CSS2, ELD, MRD14

Accession	Type	Status	MANE Select
NM_006015	mRNA	REVIEWED	✓ Yes
NM_139135	mRNA	REVIEWED	No
NM_001080819	mRNA	VALIDATED	No
NM_001341479	mRNA	REVIEWED	No
NM_001363070	mRNA	VALIDATED	No
NM_001401271	mRNA	VALIDATED	No
NM_001401273	mRNA	VALIDATED	No
NM_001401275	mRNA	VALIDATED	No
NM_001401276	mRNA	VALIDATED	No
NM_001401278	mRNA	VALIDATED	No
NM_001401279	mRNA	VALIDATED	No