import dnaio
import click
import re
import os
import json

@click.command(context_settings=dict(help_option_names=['-h', '--help']))
@click.option('--afq1', required=True,  help='input atac step1 fq1.')
@click.option('--afq2', required=True,  help='input atac step1 fq2.')
@click.option('--step1json', required=True,  help='input atac step1  summary json.')
@click.option('--outdir', default='./', show_default=True, help='output prefix.')
@click.option('--samplename', required=True,  help='samplename.')

def cutreads(afq1, afq2, step1json, outdir, samplename):
    total = 0
    available = 0
    outafq1 = os.path.join(outdir, samplename+"_cutR1.fastq.gz")
    outafq2 = os.path.join(outdir, samplename+"_cutR2.fastq.gz")
    pattern = r'CGTCCGTCGTTGCTCGTAGATGTGTATAAGAGACAG'

    with dnaio.open(file1=afq1, file2=afq2,  mode='r') as fh, dnaio.open(file1=outafq1, file2=outafq2,  mode='w') as fhout:
        for r1,r2 in fh:
            total += 1

            r1name = r1.name
            r1seq = r1.sequence
            r1qua = r1.qualities
            if len(r1seq) < 50:
                continue
            else:
                match = re.search(pattern, r1seq)
                if match:
                    start_index = match.end()
                    if start_index + 50 < len(r1seq):
                        r1_sequence = r1seq[start_index:start_index + 50]
                        r1_qualities = r1qua[start_index:start_index + 50]
                    else:
                        r1_sequence = r1seq[start_index:]
                        r1_qualities = r1qua[start_index:]
                else:
                    if len(r1seq) > 93:
                        r1_sequence = r1seq[43:93]
                        r1_qualities = r1qua[43:93]
                    else:
                        r1_sequence = r1seq[43:]
                        r1_qualities = r1qua[43:]
                re1 = dnaio.Sequence(
                        name = r1name,
                        sequence = r1_sequence,
                        qualities = r1_qualities
                        ) 
                r2name = r2.name
                r2seq = r2.sequence
                r2qua = r2.qualities
                if len(r2seq) > 50:
                    r2_sequence = r2seq[:50]
                    r2_qualities = r2qua[:50]
                else:
                    r2_sequence = r2seq
                    r2_qualities = r2qua
                re2 = dnaio.Sequence(
                        name = r2name,
                        sequence = r2_sequence,
                        qualities = r2_qualities
                        )
                available += 1
                fhout.write(re1, re2)

    print(f'step1 total reads pair number : {total}')
    print(f'step1 available reads pair number : {available}')

    with open(step1json, "r") as fh:
        step1_summary = json.load(fh)

    with open(step1json, "w") as fh1:
        step1_summary["stat"]["step1_readspair"] = total
        step1_summary["stat"]["step1_available"] = available
        json.dump(
            step1_summary,
            fh1,
            indent=4,
            default=lambda o: int(o) if isinstance(o, np.int64) else o
        )

if __name__ == '__main__':
    cutreads()

