/android/import-contacts

To get this branch, use:
bzr branch http://bzr.ed.am/android/import-contacts

« back to all changes in this revision

Viewing changes to src/org/waxworlds/edam/importcontacts/VCFImporter.java

  • Committer: edam
  • Date: 2011-03-19 20:33:09 UTC
  • Revision ID: edam@waxworlds.org-20110319203309-5dzfyqrxwk94jtin
- formatting: removed some double-indents on overrunning lines
- updated TODO and NEWS
- rewrote central logic of parser so it makes more sense, looks nicer and has a small optimisation (getting name and params from line only when necessary)
- optimised unnecessary mutliple converting of lines to US-ASCII
- re-wrote line extraction from vcards so that we can lookahead for v3 folded lines
- added support for v3 folded lines

Show diffs side-by-side

added added

removed removed

1
1
/*
2
2
 * VCFImporter.java
3
3
 *
4
 
 * Copyright (C) 2009 to 2011 Tim Marston <edam@waxworlds.org>
 
4
 * Copyright (C) 2009 Tim Marston <edam@waxworlds.org>
5
5
 *
6
6
 * This file is part of the Import Contacts program (hereafter referred
7
7
 * to as "this program"). For more information, see
32
32
import java.io.IOException;
33
33
import java.io.UnsupportedEncodingException;
34
34
import java.nio.ByteBuffer;
35
 
import java.util.ArrayList;
36
35
import java.util.Arrays;
37
 
import java.util.HashMap;
38
36
import java.util.HashSet;
39
37
import java.util.Iterator;
40
38
import java.util.List;
41
 
import java.util.NoSuchElementException;
42
39
import java.util.Set;
43
40
import java.util.Vector;
44
41
import java.util.regex.Matcher;
45
42
import java.util.regex.Pattern;
46
 
 
47
 
import org.waxworlds.edam.importcontacts.Importer.ContactData.ExtraDetail;
 
43
import java.util.NoSuchElementException;
 
44
import java.lang.UnsupportedOperationException;
48
45
 
49
46
import android.content.SharedPreferences;
50
47
import android.provider.Contacts;
333
330
        private class VCard extends ContactData
334
331
        {
335
332
                private final static int NAMELEVEL_NONE = 0;
336
 
                private final static int NAMELEVEL_FN = 1;
337
 
                private final static int NAMELEVEL_N = 2;
338
 
 
339
 
                private final static int MULTILINE_NONE = 0;
340
 
                private final static int MULTILINE_ENCODED = 1; // v2.1 quoted-printable
341
 
                private final static int MULTILINE_ESCAPED = 2; // v2.1 \\CRLF
342
 
                private final static int MULTILINE_FOLDED = 3;  // v3.0 folding
 
333
                private final static int NAMELEVEL_ORG = 1;
 
334
                private final static int NAMELEVEL_FN = 2;
 
335
                private final static int NAMELEVEL_N = 3;
343
336
 
344
337
                private String _version = null;
345
338
                private Vector< ByteBuffer > _buffers = null;
346
339
                private int _name_level = NAMELEVEL_NONE;
347
 
                private int _parser_multiline_state = MULTILINE_NONE;
 
340
                private boolean _parser_in_encoded_multiline = false;
 
341
                private boolean _parser_in_folded_multiline = false;
348
342
                private String _parser_current_name_and_params = null;
349
343
                private String _parser_buffered_value_so_far = "";
350
 
                private String _cached_organisation = null;
351
 
                private String _cached_title = null;
352
344
 
353
345
                protected class UnencodeResult
354
346
                {
477
469
                                String name_and_params;
478
470
                                int pos;
479
471
 
480
 
                                if( _parser_multiline_state != MULTILINE_NONE )
 
472
                                if( _parser_in_encoded_multiline ||
 
473
                                        _parser_in_folded_multiline )
481
474
                                {
482
475
                                        // if we're currently in a multi-line value, use the stored
483
476
                                        // property name and parameters
484
477
                                        name_and_params = _parser_current_name_and_params;
485
478
 
486
 
                                        // skip some initial line characters, depending on the type
487
 
                                        // of multi-line we're handling
488
479
                                        pos = buffer.position();
489
 
                                        switch( _parser_multiline_state )
490
 
                                        {
491
 
                                        case MULTILINE_FOLDED:
 
480
 
 
481
                                        // for folded multi-lines, skip the single space at the
 
482
                                        // start of the next line
 
483
                                        if( _parser_in_folded_multiline )
492
484
                                                pos++;
493
 
                                                break;
494
 
                                        case MULTILINE_ENCODED:
 
485
 
 
486
                                        // else, this must be an encoded multi-line, so skip any
 
487
                                        // whitespace we find at the start of the next line
 
488
                                        else
495
489
                                                while( pos < buffer.limit() && (
496
490
                                                        buffer.get( pos ) == ' ' ||
497
491
                                                        buffer.get( pos ) == '\t' ) )
498
492
                                                {
499
493
                                                        pos++;
500
494
                                                }
501
 
                                                break;
502
 
                                        default:
503
 
                                                // do nothing
504
 
                                        }
505
 
 
506
 
                                        // take us out of multi-line so that we can re-detect that
507
 
                                        // this line is a multi-line or not
508
 
                                        _parser_multiline_state = MULTILINE_NONE;
509
495
                                }
510
496
                                else
511
497
                                {
566
552
//                                      unencoding_result = unencodeBase64( props[ 1 ], charset );
567
553
                                if( unencoding_result != null ) {
568
554
                                        value = unencoding_result.getBuffer();
569
 
                                        if( unencoding_result.isAnotherLineRequired() )
570
 
                                                _parser_multiline_state = MULTILINE_ENCODED;
 
555
                                        _parser_in_encoded_multiline =
 
556
                                                unencoding_result.isAnotherLineRequired();
571
557
                                }
572
558
 
573
559
                                // convert 8-bit ASCII charset to US-ASCII
585
571
                                        throw new ParseException( R.string.error_vcf_charset );
586
572
                                }
587
573
 
588
 
                                // for some entries that have semicolon-separated value parts,
589
 
                                // check to see if the value ends in an escape character, which
590
 
                                // indicates that we have a multi-line value
591
 
                                if( ( name_param_parts[ 0 ].equals( "N" ) ||
592
 
                                        name_param_parts[ 0 ].equals( "ORG" ) ||
593
 
                                        name_param_parts[ 0 ].equals( "ADR" ) ) &&
594
 
                                        doesStringEndInAnEscapeChar( string_value ) )
595
 
                                {
596
 
                                        _parser_multiline_state = MULTILINE_ESCAPED;
597
 
                                        string_value = string_value.substring( 0,
598
 
                                                string_value.length() - 1 );
599
 
                                }
600
 
 
601
574
                                // now we know whether we're in an encoding multi-line,
602
575
                                // determine if we're in a v3 folded multi-line or not
603
 
                                if( _parser_multiline_state == MULTILINE_NONE &&
604
 
                                        _version.equals( "3.0" ) && next_line_looks_folded )
 
576
                                _parser_in_folded_multiline = !_parser_in_encoded_multiline &&
 
577
                                        _version.equals( "3.0" ) && next_line_looks_folded;
 
578
 
 
579
                                // handle multi-line requests
 
580
                                if( _parser_in_encoded_multiline ||
 
581
                                        _parser_in_folded_multiline )
605
582
                                {
606
 
                                        _parser_multiline_state = MULTILINE_FOLDED;
607
 
                                }
608
 
 
609
 
                                // handle multi-lines by buffering them and parsing them when we
610
 
                                // are processing the last line in a multi-line sequence
611
 
                                if( _parser_multiline_state != MULTILINE_NONE ) {
612
583
                                        _parser_buffered_value_so_far += string_value;
613
584
                                        return;
614
585
                                }
 
586
 
 
587
                                // add on buffered multi-line content
615
588
                                String complete_value =
616
 
                                        ( _parser_buffered_value_so_far + string_value ).trim();
 
589
                                        _parser_buffered_value_so_far + string_value;
617
590
 
618
591
                                // ignore empty values
619
592
                                if( complete_value.length() < 1 ) return;
625
598
                                        parseFN( name_param_parts, complete_value );
626
599
                                else if( name_param_parts[ 0 ].equals( "ORG" ) )
627
600
                                        parseORG( name_param_parts, complete_value );
628
 
                                else if( name_param_parts[ 0 ].equals( "TITLE" ) )
629
 
                                        parseTITLE( name_param_parts, complete_value );
630
601
                                else if( name_param_parts[ 0 ].equals( "TEL" ) )
631
602
                                        parseTEL( name_param_parts, complete_value );
632
603
                                else if( name_param_parts[ 0 ].equals( "EMAIL" ) )
633
604
                                        parseEMAIL( name_param_parts, complete_value );
634
 
                                else if( name_param_parts[ 0 ].equals( "ADR" ) )
635
 
                                        parseADR( name_param_parts, complete_value );
636
 
                        }
637
 
                }
638
 
 
639
 
                private boolean doesStringEndInAnEscapeChar( String string )
640
 
                {
641
 
                        // count the number of backslashes at the end of the string
642
 
                        int count = 0;
643
 
                        for( int a = string.length() - 1; a >= 0; a-- )
644
 
                                if( string.charAt( a ) == '\\' )
645
 
                                        count++;
646
 
                                else
647
 
                                        break;
648
 
 
649
 
                        // if there are an even number of backslashes then the final one
650
 
                        // doesn't count
651
 
                        return ( count & 1 ) == 1;
652
 
                }
653
 
 
654
 
                private String[] splitValueBySemicolon( String value )
655
 
                {
656
 
                        // split string in to parts by semicolon
657
 
                        ArrayList< String > parts = new ArrayList< String >(
658
 
                                Arrays.asList( value.split(  ";" ) ) );
659
 
 
660
 
                        // go through parts
661
 
                        for( int a = 0; a < parts.size(); a++ )
662
 
                        {
663
 
                                String str = parts.get( a );
664
 
 
665
 
                                // look for parts that end in an escape character, but ignore
666
 
                                // the final part. We've already detected escape chars at the
667
 
                                // end of the final part in parseLine() and handled multi-lines
668
 
                                // accordingly.
669
 
                                if( a < parts.size() - 1 &&
670
 
                                        doesStringEndInAnEscapeChar( str ) )
671
 
                                {
672
 
                                        // join the next part to this part and remove the next part
673
 
                                        parts.set( a, str.substring( 0, str.length() - 1 ) +
674
 
                                                ';' + parts.get( a + 1 ) );
675
 
                                        parts.remove( a + 1 );
676
 
 
677
 
                                        // re-visit this part
678
 
                                        a--;
679
 
                                        continue;
680
 
                                }
681
 
 
682
 
                                // trim and replace string
683
 
                                str = str.trim();
684
 
                                parts.set( a, str );
685
 
                        }
686
 
 
687
 
                        String[] ret = new String[ parts.size() ];
688
 
                        return parts.toArray( ret );
 
605
                        }
689
606
                }
690
607
 
691
608
                private void parseN( String[] params, String value )
 
609
                        throws ParseException, SkipContactException,
 
610
                        AbortImportException
692
611
                {
693
612
                        // already got a better name?
694
613
                        if( _name_level >= NAMELEVEL_N ) return;
695
614
 
696
615
                        // get name parts
697
 
                        String[] name_parts = splitValueBySemicolon( value );
 
616
                        String[] name_parts = value.split( ";" );
 
617
                        for( int i = 0; i < name_parts.length; i++ )
 
618
                                name_parts[ i ] = name_parts[ i ].trim();
698
619
 
699
620
                        // build name
700
621
                        value = "";
706
627
                        // set name
707
628
                        setName( value );
708
629
                        _name_level = NAMELEVEL_N;
 
630
 
 
631
                        // check now to see if we need to import this contact (to avoid
 
632
                        // parsing the rest of the vCard unnecessarily)
 
633
                        if( !isImportRequired( getName() ) )
 
634
                                throw new SkipContactException();
709
635
                }
710
636
 
711
637
                private void parseFN( String[] params, String value )
 
638
                        throws ParseException, SkipContactException
712
639
                {
713
640
                        // already got a better name?
714
641
                        if( _name_level >= NAMELEVEL_FN ) return;
719
646
                }
720
647
 
721
648
                private void parseORG( String[] params, String value )
 
649
                        throws ParseException, SkipContactException
722
650
                {
 
651
                        // already got a better name?
 
652
                        if( _name_level >= NAMELEVEL_ORG ) return;
 
653
 
723
654
                        // get org parts
724
 
                        String[] org_parts = splitValueBySemicolon( value );
725
 
                        if( org_parts == null || org_parts.length < 1 ) return;
726
 
 
727
 
                        // build organisation name
728
 
                        StringBuilder builder = new StringBuilder(
729
 
                                String.valueOf( org_parts[ 0 ] ) );
730
 
                        for( int a = 1; a < org_parts.length; a++ )
731
 
                                builder.append( ", " ).append( org_parts[ a ] );
732
 
                        String organisation = builder.toString();
733
 
 
734
 
                        // set organisation name (using a title we've previously found)
735
 
                        addOrganisation( organisation, _cached_title, true );
736
 
 
737
 
                        // if we've not previously found a title, store this organisation
738
 
                        // name (we'll need it when we find a title to update the
739
 
                        // organisation, by name), else if we *have* previously found a
740
 
                        // title, clear it (since we just used it)
741
 
                        if( _cached_title == null )
742
 
                                _cached_organisation = organisation;
743
 
                        else
744
 
                                _cached_title = null;
745
 
                }
746
 
 
747
 
                private void parseTITLE( String[] params, String value )
748
 
                {
749
 
                        // if we previously had an organisation, look it up and append this
750
 
                        // title to it
751
 
                        if( _cached_organisation != null && hasOrganisations() ) {
752
 
                                HashMap< String, ExtraDetail > datas = getOrganisations();
753
 
                                ExtraDetail detail = datas.get( _cached_organisation );
754
 
                                if( detail != null )
755
 
                                        detail.setExtra( value );
756
 
                        }
757
 
 
758
 
                        // same as when handling organisation, if we've not previously found
759
 
                        // an organisation we store this title, else we clear it (since we
760
 
                        // just appended this title to it)
761
 
                        if( _cached_organisation == null )
762
 
                                _cached_title = value;
763
 
                        else
764
 
                                _cached_organisation = null;
 
655
                        String[] org_parts = value.split( ";" );
 
656
                        for( int i = 0; i < org_parts.length; i++ )
 
657
                                org_parts[ i ] = org_parts[ i ].trim();
 
658
 
 
659
                        // build name
 
660
                        if( org_parts.length > 1 && org_parts[ 0 ].length() == 0 )
 
661
                                value = org_parts[ 1 ];
 
662
                        else
 
663
                                value = org_parts[ 0 ];
 
664
 
 
665
                        // set name
 
666
                        setName( value );
 
667
                        _name_level = NAMELEVEL_ORG;
765
668
                }
766
669
 
767
670
                private void parseTEL( String[] params, String value )
 
671
                        throws ParseException
768
672
                {
769
673
                        if( value.length() == 0 ) return;
770
674
 
774
678
 
775
679
                        // here's the logic...
776
680
                        boolean preferred = types.contains( "PREF" );
777
 
                        int type;
 
681
                        int type = PhonesColumns.TYPE_MOBILE;
 
682
                        if( types.contains( "VOICE" ) )
 
683
                                if( types.contains( "WORK" ) )
 
684
                                        type = PhonesColumns.TYPE_WORK;
 
685
                                else
 
686
                                        type = PhonesColumns.TYPE_HOME;
 
687
                        else if( types.contains( "CELL" ) || types.contains( "VIDEO" ) )
 
688
                                type = PhonesColumns.TYPE_MOBILE;
778
689
                        if( types.contains( "FAX" ) )
779
690
                                if( types.contains( "HOME" ) )
780
691
                                        type = PhonesColumns.TYPE_FAX_HOME;
781
692
                                else
782
693
                                        type = PhonesColumns.TYPE_FAX_WORK;
783
 
                        else if( types.contains( "CELL" ) || types.contains( "VIDEO" ) )
784
 
                                type = PhonesColumns.TYPE_MOBILE;
785
 
                        else if( types.contains( "PAGER" ) )
 
694
                        if( types.contains( "PAGER" ) )
786
695
                                type = PhonesColumns.TYPE_PAGER;
787
 
                        else if( types.contains( "WORK" ) )
788
 
                                type = PhonesColumns.TYPE_WORK;
789
 
                        else
790
 
                                type = PhonesColumns.TYPE_HOME;
791
696
 
792
697
                        // add phone number
793
 
                        addNumber( value, type, preferred );
 
698
                        addPhone( value, type, preferred );
794
699
                }
795
700
 
796
701
                public void parseEMAIL( String[] params, String value )
 
702
                        throws ParseException
797
703
                {
798
704
                        if( value.length() == 0 ) return;
799
705
 
800
706
                        Set< String > types = extractTypes( params, Arrays.asList(
801
707
                                "PREF", "WORK", "HOME", "INTERNET" ) );
802
708
 
803
 
                        // add email address
 
709
                        // here's the logic...
804
710
                        boolean preferred = types.contains( "PREF" );
805
 
                        int type;
806
 
                        if( types.contains( "WORK" ) )
807
 
                                type = Contacts.ContactMethods.TYPE_WORK;
808
 
                        else
809
 
                                type = Contacts.ContactMethods.TYPE_HOME;
810
 
 
811
 
                        addEmail( value, type, preferred );
812
 
                }
813
 
 
814
 
                private void parseADR( String[] params, String value )
815
 
                {
816
 
                        // get address parts
817
 
                        String[] adr_parts = splitValueBySemicolon( value );
818
 
 
819
 
                        // build address
820
 
                        value = "";
821
 
                        for( int a = 0; a < adr_parts.length; a++ ) {
822
 
                                if( value.length() > 0 ) value += "\n";
823
 
                                value += adr_parts[ a ].trim();
824
 
                        }
825
 
 
826
 
                        Set< String > types = extractTypes( params, Arrays.asList(
827
 
                                "PREF", "WORK", "HOME", "INTERNET" ) );
828
 
 
829
 
                        // add address
830
 
                        int type;
831
 
                        if( types.contains( "WORK" ) )
832
 
                                type = Contacts.ContactMethods.TYPE_WORK;
833
 
                        else
834
 
                                type = Contacts.ContactMethods.TYPE_HOME;
835
 
 
836
 
                        addAddress( value, type );
 
711
                        if( types.contains( "WORK" ) )
 
712
                                addEmail( value, Contacts.ContactMethods.TYPE_WORK, preferred );
 
713
                        else
 
714
                                addEmail( value, Contacts.ContactMethods.TYPE_HOME, preferred );
837
715
                }
838
716
 
839
717
                public void finaliseParsing()
844
722
                        if( _version == null && _buffers != null )
845
723
                                throw new ParseException( R.string.error_vcf_malformed );
846
724
 
847
 
                        // check if we should import this contact
848
 
                        try {
849
 
                                if( !isImportRequired( this ) )
850
 
                                        throw new SkipContactException();
851
 
                        }
852
 
                        catch( ContactNeedsMoreInfoException e ) {
853
 
                                throw new ParseException( R.string.error_vcf_notenoughinfo );
854
 
                        }
 
725
                        //  missing name properties?
 
726
                        if( _name_level == NAMELEVEL_NONE )
 
727
                                throw new ParseException( R.string.error_vcf_noname );
 
728
 
 
729
                        // check if we should import this one? If we've already got an 'N'-
 
730
                        // type name, this will already have been done by parseN() so we
 
731
                        // mustn't do this here (or it could prompt twice!)
 
732
                        if( _name_level < NAMELEVEL_N && !isImportRequired( getName() ) )
 
733
                                throw new SkipContactException();
855
734
                }
856
735
 
857
736
                private String checkParam( String[] params, String name )