SPSS Statistics

SPSS Statistics

Your hub for statistical analysis, data management, and data documentation. Connect, learn, and share with your peers! 

 View Only

Splitting Files within SPSS

  • 1.  Splitting Files within SPSS

    Posted 9 days ago

    This syntax might be helpful for anyone attempting to split out files.  There are a few options below.

    * Encoding: UTF-8.

    *all python code needs to be run in unicode for v29 >, this includes jon peck's extensions.
    *PYTHON SPLIT *2.
    *##############################################################################.
    begin program.
    import os, pandas as pd
    a_frame = pd.read_spss('directory/active 5 19 2025.sav')
    os.makedirs("split_directory", exist_ok=True)
    os.chdir('split_directory')
    a_frame = a_frame.sort_values(by='meno')
    for value in a_frame['meno2'].unique():
        temp_file = a_frame[a_frame['meno2'] == value]
        temp_file.to_csv(f'{value} splitter.csv', index=False)
        temp_file.to_excel(f'{value} splitter.xlsx', index=False)
    end program.
    begin program.
    import os
    import pandas as pd
    a_frame = pd.read_spss('directory/active 5 19 2025.sav')
    os.makedirs("split_directory", exist_ok=True)
    os.chdir('split_directory')
    a_frame = a_frame.sort_values(by='meno')
    for value, group in a_frame.groupby('meno2'):
        group.to_csv(f'{value} splitter2.csv', index=False)
        group.to_excel(f'{value} splitter2.xlsx', index=False)
    end program.
    *##############################################################################.

    *SPSS EXTENSION HUB SPLIT AND PROCESS. V29 AND CALL .SPS FILES FROM SPLIT CONVERT.  For sps guidance see John Peck's documentation on SPSSinc process files. 
    *##############################################################################.
    dataset close all.
    get file='directory/active 5 19 2025.sav'.
    string slabel(a55).
    compute slabel=VALUELABEL(meno2).
    sort cases by meno.
    *file split.
    spssinc split dataset splitvar = slabel
    /OUTPUT DIRECTORY='split_directory' DELETECONTENTS = NO
    FILENAME = "${slabel} spssinc"
    /OPTIONS NAMES =VALUES PRINTLIST=YES.
    *process sav to csv.
    SPSSINC PROCESS FILES INPUTDATA='split_directory\*.sav'
    SYNTAX="sps_directory\ConvertSavToCSV.sps"
        OUTPUTDATADIR= "split_directory" OUTPUTDATATYPE=CSV.
    *process sav to xls.
    SPSSINC PROCESS FILES INPUTDATA='split_directory\*.sav'
    SYNTAX="sps_directory\ConvertSavToXLS.sps"
        OUTPUTDATADIR= "split_directory" OUTPUTDATATYPE=xls.
    *##############################################################################.


    *SPSS MACRO SPLIT CSV.
    *##############################################################################.
    dataset close all.
    get file='directory/active.sav'.
    string slabel(a55).
    compute slabel=VALUELABEL(meno2).
    sort cases by meno.
    SAVE OUTFILE='split_directory\mydata.sav'.

    * Now start the job.
    set mprint=on.

    *//////////////////////////////////////////.
    DEFINE !SPLIT (var=!TOKENS(1) /value=!TOKENS(1) )
    TEMPORARY.
    SELECT IF (!var=!value).
    SAVE TRANSLATE OUTFILE=!QUOTE(!CONCAT('split_directory\',!EVAL(!UNQUOTE(!value)),'.csv'))
      /TYPE=CSV
      /ENCODING='Locale'
      /MAP
      /REPLACE
      /FIELDNAMES
      /CELLS=LABELS.
    execute.
    !ENDDEFINE.
    *//////////////////////////////////////////.

    * Find all different existing values of cat1.
    AGGREGATE  /OUTFILE=*
      /BREAK=slabel  /notused = N.

    * Write a syntax file which will call the above macro.
    WRITE OUTFILE='split_directory\temp.sps'
     /'!SPLIT var=slabel value="'slabel'".'.
    Execute.

    * Get the original data file and do the macro calls.
    GET FILE='split_directory\mydata.sav'.
    insert file = 'split_directory\temp.sps'.
    *##############################################################################.

    *SPSS MACRO SPLIT XLSX.
    *##############################################################################.
    dataset close all.
    get file='directory/active 5 19 2025.sav'.
    string slabel(a55).
    compute slabel=VALUELABEL(meno2).
    sort cases by meno.
    SAVE OUTFILE='split_directory\mydata.sav'.

    * Now start the job.
    set mprint=on.

    *//////////////////////////////////////////.
    DEFINE !SPLIT (var=!TOKENS(1) /value=!TOKENS(1) )
    TEMPORARY.
    SELECT IF (!var=!value).

    SAVE TRANSLATE OUTFILE=!QUOTE(!CONCAT('split_directory\',!EVAL(!UNQUOTE(!value)),'.xlsx'))
      /TYPE=XLS
      /VERSION=12
      /MAP
      /FIELDNAMES VALUE=LABELS
      /CELLS=LABELS
      /EXCELOPTIONS SHEET='sheet1'
      /REPLACE.
    execute.
    !ENDDEFINE.


    * Find all different existing values of cat1.
    AGGREGATE  /OUTFILE=*
      /BREAK=slabel  /notused = N.

    * Write a syntax file which will call the above macro.
    WRITE OUTFILE='split_directory\temp.sps'
     /'!SPLIT var=slabel value="'slabel'".'.
    Execute.

    * Get the original data file and do the macro calls.
    GET FILE='split_directory\mydata.sav'.
    insert file = 'split_directory\temp.sps'.
    *##############################################################################.


    *PYTHON + SPSS MODULES HYBRID CSV.
    *##############################################################################.
    dataset close all.
    get file='directory/active 5 19 2025.sav'.
    string slabel(a55).
    compute slabel=VALUELABEL(meno2).
    sort cases by meno.
    BEGIN PROGRAM.
    import spss, spssdata, os
    splitvar = "slabel"  # Change to your split variable
    outdir = r"split_directory"
    # Get unique values for splitvar using spssdata
    with spssdata.Spssdata() as cursor:
        idx = cursor.namelist.index(splitvar)
        unique_vals = set(row[idx] for row in cursor)
    for v in unique_vals:
        v_quoted = f'"{v}"' if isinstance(v, str) else str(v)
        outfile = f"{outdir}/{str(v).strip()}_split88.csv"
        spss.Submit(f"""
        TEMPORARY.
        SELECT IF ({splitvar} = {v_quoted}).
        SAVE TRANSLATE OUTFILE='{outfile}'
          /TYPE=CSV
          /ENCODING='Locale'
          /MAP
          /REPLACE
          /FIELDNAMES
          /CELLS=LABELS.
    execute.
        """)
    END PROGRAM.
    *##############################################################################.

    *PYTHON + SPSS MODULES HYBRID XLSX.
    *##############################################################################.
    dataset close all.
    get file='directory/active 5 19 2025.sav'.
    string slabel(a55).
    compute slabel=VALUELABEL(meno2).
    sort cases by meno.
    BEGIN PROGRAM.
    import spss, spssdata, os
    splitvar = "slabel"  # Change to your split variable
    outdir = r"split_directory"
    # Get unique values for splitvar using spssdata
    with spssdata.Spssdata() as cursor:
        idx = cursor.namelist.index(splitvar)
        unique_vals = set(row[idx] for row in cursor)
    for v in unique_vals:
        v_quoted = f'"{v}"' if isinstance(v, str) else str(v)
        outfile = f"{outdir}/{str(v).strip()}_split88.xlsx"
        spss.Submit(f"""
        TEMPORARY.
        SELECT IF ({splitvar} = {v_quoted}).
        SAVE TRANSLATE OUTFILE='{outfile}'
          /TYPE=XLS
          /VERSION=12
          /MAP
          /FIELDNAMES VALUE=LABELS
          /CELLS=LABELS
          /EXCELOPTIONS SHEET='sheet1'
          /REPLACE.
    execute.
        """)
    END PROGRAM.
    *##############################################################################.
    *append below.

    dataset close all.
    get file='directory/active 5 19 2025.sav'.
    string slabel(a55).
    compute slabel=VALUELABEL(meno2).
    sort cases by meno.
    BEGIN PROGRAM.
    import spss, spssdata, os
    splitvar = "slabel"  # Change to your split variable
    outdir = r"split_directory"
    # Get unique values for splitvar using spssdata
    with spssdata.Spssdata() as cursor:
        idx = cursor.namelist.index(splitvar)
        unique_vals = set(row[idx] for row in cursor)
    for v in unique_vals:
        v_quoted = f'"{v}"' if isinstance(v, str) else str(v)
        outfile = f"{outdir}/{str(v).strip()}_split88.xlsx"
        spss.Submit(f"""
        TEMPORARY.
        SELECT IF ({splitvar} = {v_quoted}).
        SAVE TRANSLATE OUTFILE='{outfile}'
          /TYPE=XLS
          /VERSION=12
          /MAP
          /FIELDNAMES VALUE=LABELS
          /CELLS=LABELS
          /EXCELOPTIONS SHEET='sheet2'
          /append.
    execute.
        """)
    END PROGRAM.

    dataset close all.
    get file='directory/active 5 19 2025.sav'.
    string slabel(a55).
    compute slabel=VALUELABEL(meno2).
    sort cases by meno.
    BEGIN PROGRAM.
    import spss, spssdata, os
    splitvar = "slabel"  # Change to your split variable
    outdir = r"split_directory"
    # Get unique values for splitvar using spssdata
    with spssdata.Spssdata() as cursor:
        idx = cursor.namelist.index(splitvar)
        unique_vals = set(row[idx] for row in cursor)
    for v in unique_vals:
        v_quoted = f'"{v}"' if isinstance(v, str) else str(v)
        outfile = f"{outdir}/{str(v).strip()}_split88.xlsx"
        spss.Submit(f"""
        TEMPORARY.
        SELECT IF ({splitvar} = {v_quoted}).
        SAVE TRANSLATE OUTFILE='{outfile}'
          /TYPE=XLS
          /VERSION=12
          /MAP
          /FIELDNAMES VALUE=LABELS
          /CELLS=LABELS
          /EXCELOPTIONS SHEET='sheet3'
          /append.
    execute.
        """)
    END PROGRAM.



    ------------------------------
    Art Jack
    ------------------------------