import React from 'react'
import Typography from '@mui/material/Typography'
import styled from 'styled-components'
import TableImage from '@/assets/images/info/table.png'
import ChartImage from '@/assets/images/info/chart.png'

const ProjectNewsBoxWrapper = styled.div`
  margin-bottom: 30px;
`
export default function ProjectNewsBox() {
    return (
        <ProjectNewsBoxWrapper>
            <Typography variant='h1'>Gen Seeker v1</Typography>
            <strong>
                Sample cohort description Secondary analysis and variant
                filtering Sample QC Variant frequency calculation
            </strong>
            <Typography variant='h2'>Sample cohort description</Typography>
            <Typography variant='body1' paragraph>
                The variant database of Gen Seeker v1 is composed of small
                variants from high- quality WGS data of 680 healthy donors from
                the Czech Republic that were sequenced as part of the ENIGMA
                project which aims to build the genome map of the Czech
                population.
            </Typography>
            <Typography variant='body1' paragraph>
                Sex of the samples was inferred from the data using the Ploidy
                Estimator run under the DRAGEN 3.10 DNA pipeline.
            </Typography>
            <Typography variant='body1' paragraph>
                In the v1 cohort, the composition of sex karyotypes is
                following:
            </Typography>
            <Typography variant='body1' paragraph>
                XY – 494 samples
            </Typography>
            <Typography variant='body1' paragraph>
                XX – 184 samples
            </Typography>
            <Typography variant='body1' paragraph>
                XXY – 1 sample
            </Typography>
            <Typography variant='body1' paragraph>
                XYY – 1 sample
            </Typography>
            <Typography variant='body1' paragraph>
                Ploidy Estimator tool estimates the sex karyotype of the sample
                using the ratios of the median sex chromosome coverages to the
                median autosomal coverage. The sex karyotype is determined based
                on the range the ratios fall in:
            </Typography>
            <div>
                <img src={TableImage} alt={''} />
            </div>
            <Typography variant='body1' paragraph>
                The plot bellow shows the normalized X vs. normalized Y
                coverages and the estimated ploidy of samples in the v1 cohort.
            </Typography>
            <div>
                <img src={ChartImage} alt={''} />
            </div>
            <Typography variant='h2'>
                Secondary analysis and variant filtering
            </Typography>
            <Typography variant='body1' paragraph>
                FASTQs were processed by the Illumina DRAGEN Bio-IT Platform
                v3.10 using the hg38 alt-masked graph reference. The pipeline
                was set to produce VCFs with small variants but also with CNVs,
                SVs and results for several other specialized DRAGEN callers all
                of which are not yet part of the v1 release which focuses only
                on small variants (SNVs and indels). Though variants from mtDNA
                have also been called, they are not yet part of the
                frequency-aggregated database available through czechgenome.cz,
                however may be included in future releases of Gen Seeker.
            </Typography>
            <Typography variant='body1' paragraph>
                Due to the nature of DRAGEN’s variant caller which employs
                probabilistic models, it has inbuilt capabilities to distinguish
                false calls from true calls. For this reason it’s not necessary
                to perform strict post-filtering of variants and only simple
                hard- filtering was performed. Variants that fulfilled the
                criteria bellow were filtered out:
            </Typography>
            <Typography variant='body1' paragraph>
                {'snp: QUAL < 10.41'}
            </Typography>
            <Typography variant='body1' paragraph>
                {'indel: QUAL < 7.83'}
            </Typography>
            <Typography variant='body1' paragraph>
                {'all: DP <= 1'}
            </Typography>
            <Typography variant='h2'>Sample QC</Typography>
            <Typography variant='body1' paragraph>
                {'The final set of 677 samples consists of samples that passed two quality criteria:\n' +
                    '          1. Het/Hom ratio <= 1.75\n' +
                    '          2. (PCT of genome with coverage 10x: inf)  > 90) OR (PCT of genome with\n' +
                    '          coverage 3x: inf > 95)'}
            </Typography>
            <Typography variant='h2'>Variant frequency calculation</Typography>
            <Typography variant='body1' paragraph>
                The variants of all individuals are stored in HAIL database that
                is runs on local institutional server. This database served as
                the basis for computation of aggregated database which consists
                of only anonymized unique variants for which the following
                information is stored:
            </Typography>
            <Typography variant='body1' paragraph></Typography>
            <Typography variant='body1' paragraph>
                Allele Count – Alternate allele calls in high-quality genotypes
            </Typography>
            <Typography variant='body1' paragraph>
                Allele Frequency – Alternate allele frequency in high quality
                genotypes
            </Typography>
            <Typography variant='body1' paragraph>
                Allele Frequency XX - Alternate allele frequency in high quality
                genotypes of individuals with XX karyotype
            </Typography>
            <Typography variant='body1' paragraph>
                Allele Frequency XY - Alternate allele frequency in high quality
                genotypes of individuals with XY karyotype
            </Typography>
            <Typography variant='body1' paragraph>
                Mean coverage at the alternate allele’s locus
            </Typography>
            <Typography variant='body1' paragraph>
                Homozygote Count – Number of individuals homozygous for the
                alternate allele
            </Typography>
            <Typography variant='body1' paragraph>
                The information above is presented for each variant in the
                Variant detail page though the Gen Seeker’s interface.
            </Typography>
            <Typography variant='body1' paragraph>
                The allele frequency was computed using the bellow formula:
            </Typography>
            <code>
                freq(a) =(sum(samples_with_geno_aa x 2) +
                sum(samples_with_geno_Aa)) / (samples x 2)
            </code>
        </ProjectNewsBoxWrapper>
    )
}
