import numpy as np
from entropy_numba import calculate_entropy_similarity,calculate_spectral_entropy,calculate_unweighted_entropy_similarity

def parse_mgf_block(mgf_str, l2_normalize=False):
    mzs = []
    intensities = []

    for line in mgf_str.strip().splitlines():
        line = line.strip()
        if not line or line.startswith("BEGIN") or line.startswith("END"):
            continue
        if "=" in line:
            continue  # Skip metadata like PEPMASS, CHARGE, etc.
        try:
            mz, inten = map(float, line.split())
            mzs.append(mz)
            intensities.append(inten)
        except ValueError:
            continue

    mzs = np.array(mzs, dtype=np.float32)
    intensities = np.array(intensities, dtype=np.float32)

    if l2_normalize and intensities.size > 0:
        norm = np.linalg.norm(intensities)
        if norm > 0:
            intensities = intensities / norm

    peaks = np.column_stack((mzs, intensities))
    return peaks

query_mgf = """BEGIN IONS
SCANS=23748
PEPMASS=223.06349
CHARGE=1
COLLISION_ENERGY=0.0
50.791916	3435.061523
55.055046	3908.976318
55.748184	3028.63916
56.259556	4268.980957
57.070576	9512.685547
69.07048	7972.945312
73.901649	3230.746826
80.442108	3023.831543
81.070023	3893.279785
83.086006	6937.650879
90.434692	2871.3396
93.070412	4153.100586
95.085617	5388.214844
103.593796	3313.668945
107.085709	7970.878906
113.754196	3909.355713
121.101074	6016.883301
123.080444	4487.064941
135.116409	4593.134277
149.023376	20133.859375
149.132339	6352.644043
191.000061	4246.229492
191.224289	3604.191406
192.981857	3114.447998
207.032196	3846.541748
222.986267	5720.391602
223.064133	18260.234375
223.111099	4997.09082
223.169724	5440.414551
223.205353	6825.792969
225.04274	284222.78125
225.080261	4607.435547
225.110291	6737.415039
227.022385	49683.398438
END IONS"""

reference_mgf = """BEGIN IONS
SCANS=28327
PEPMASS=381.24832
CHARGE=1
COLLISION_ENERGY=0.0
51.316906	3039.795654
57.034149	3960.914307
59.046772	2952.169434
59.04977	204441.84375
68.072746	3962.780762
68.486237	3288.236572
75.976875	3527.386475
81.070312	6722.994629
83.050056	4921.953613
85.065224	5887.714844
93.070175	4548.616699
95.085854	4804.586914
97.06543	5472.434082
99.005447	3987.838623
103.075638	29113.119141
105.070892	5807.663574
107.0858	5937.092285
109.064896	9199.212891
110.898888	3442.830322
112.338295	3229.999268
115.075775	5455.26416
117.091217	6091.44043
119.085617	5936.018066
121.100983	6680.05127
123.079857	5223.368652
131.070236	78652.664062
145.100998	6911.617676
147.080673	5672.179688
149.096802	5325.969238
157.1008	6968.431152
161.096298	4072.158691
164.942017	3747.665527
165.090759	4969.930176
173.093369	3439.692627
183.118286	5934.271973
187.112976	4640.814453
189.12709	3710.097656
191.107285	5058.105469
205.121185	4985.901855
215.1073	4079.940186
217.123856	5710.056152
219.136124	3913.680908
221.836182	7433.515625
271.169189	3809.697021
293.354034	3646.683594
299.16452	7004.865234
310.482147	3513.313232
335.188446	4969.879883
336.15213	3844.234619
346.834808	3584.11084
363.185608	3712.689453
381.189453	6185.338379
END IONS"""

# Parse both spectra
peaks_query = parse_mgf_block(query_mgf, l2_normalize=True)
peaks_reference = parse_mgf_block(reference_mgf, l2_normalize=True)
# Convert to numpy 2D arrays

print(peaks_query)
print(peaks_reference)

entropy = calculate_spectral_entropy(peaks_query, min_ms2_difference_in_da = 0.01)
print(f"Spectral entropy is {entropy}.")
unweighted_similarity = calculate_unweighted_entropy_similarity(peaks_query, peaks_reference, ms2_tolerance_in_da = 0.01)
print(f"Unweighted entropy similarity: {unweighted_similarity}.")
similarity = calculate_entropy_similarity(peaks_query, peaks_reference,clean_spectra = False, ms2_tolerance_in_da = 0.01)
print(f"Entropy similarity: {similarity}.")
