Skip to main content

Database Schema

Zygotrix uses MongoDB as its primary database. This document describes the data models and collections.

Collections Overview

CollectionPurpose
usersUser accounts and profiles
traitsGenetic traits database
conversationsAI chatbot conversations
gwas_datasetsUploaded GWAS datasets
gwas_jobsGWAS analysis jobs
gwas_resultsGWAS analysis results
university_coursesEducational courses
university_enrollmentsCourse enrollments

User Schema

{
"_id": ObjectId,
"email": "user@example.com",
"password_hash": "$2b$12$...",
"full_name": "John Doe",
"role": "user", // "user", "admin", "super_admin"
"created_at": ISODate,
"updated_at": ISODate,
"email_verified": true,
"preferences": {
"style": "conversational",
"length": "detailed",
"auto_learn": true
}
}

Trait Schema

{
"_id": ObjectId,
"name": "Eye Color",
"type": "simple_dominant",
"inheritance_pattern": "mendelian",
"alleles": [
{"symbol": "B", "name": "Brown", "dominant": true},
{"symbol": "b", "name": "Blue", "dominant": false}
],
"phenotypes": [
{"genotypes": ["BB", "Bb"], "name": "Brown eyes"},
{"genotypes": ["bb"], "name": "Blue eyes"}
],
"description": "Eye color determined by...",
"tags": ["human", "eye", "pigmentation"],
"created_at": ISODate
}

Conversation Schema

{
"_id": ObjectId,
"user_id": ObjectId,
"title": "Punnett Square Question",
"created_at": ISODate,
"updated_at": ISODate,
"messages": [
{
"id": UUID,
"role": "user",
"content": "What is a Punnett square?",
"timestamp": ISODate
},
{
"id": UUID,
"role": "assistant",
"content": "A Punnett square is...",
"timestamp": ISODate,
"tools_used": ["search_traits"]
}
],
"context": {
"last_trait": "eye_color",
"preferences": {}
}
}

GWAS Dataset Schema

{
"_id": ObjectId,
"user_id": ObjectId,
"name": "test.vcf",
"file_type": "vcf",
"status": "processed", // "pending", "processing", "processed", "error"
"storage_path": "gwas-datasets/user123/dataset456/raw/test.vcf",
"snp_count": 10000,
"sample_count": 500,
"created_at": ISODate,
"processed_at": ISODate,
"metadata": {
"chromosomes": [1, 2, 3, ...],
"build": "GRCh38"
}
}

GWAS Job Schema

{
"_id": ObjectId,
"user_id": ObjectId,
"dataset_id": ObjectId,
"status": "completed", // "pending", "running", "completed", "failed"
"analysis_type": "linear",
"parameters": {
"maf_threshold": 0.01,
"num_threads": 4
},
"created_at": ISODate,
"started_at": ISODate,
"completed_at": ISODate,
"error": null,
"result_id": ObjectId
}

Indexes

Users Collection

db.users.createIndex({ "email": 1 }, { unique: true })
db.users.createIndex({ "role": 1 })

Traits Collection

db.traits.createIndex({ "name": "text", "description": "text" })
db.traits.createIndex({ "type": 1 })
db.traits.createIndex({ "tags": 1 })

Conversations Collection

db.conversations.createIndex({ "user_id": 1, "updated_at": -1 })

GWAS Collections

db.gwas_datasets.createIndex({ "user_id": 1, "created_at": -1 })
db.gwas_jobs.createIndex({ "user_id": 1, "status": 1 })
db.gwas_jobs.createIndex({ "dataset_id": 1 })

Data Relationships

users

├──< conversations (1:many)
│ │
│ └── messages (embedded array)

├──< gwas_datasets (1:many)
│ │
│ └──< gwas_jobs (1:many)
│ │
│ └──> gwas_results (1:1)

└──< university_enrollments (1:many)

└──> university_courses (many:1)

Migration Scripts

Seed Traits

cd backend
python seed_mendelian_traits.py

Create Indexes

// Run in MongoDB shell
use zygotrix

// Users
db.users.createIndex({ "email": 1 }, { unique: true })

// Traits
db.traits.createIndex({ "name": "text", "description": "text" })
db.traits.createIndex({ "type": 1 })

// GWAS
db.gwas_datasets.createIndex({ "user_id": 1 })
db.gwas_jobs.createIndex({ "user_id": 1, "status": 1 })